pyeasyphd 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyeasyphd might be problematic. Click here for more details.
- pyeasyphd/.python-version +1 -1
- pyeasyphd/main/__init__.py +0 -4
- pyeasyphd/main/basic_input.py +7 -63
- pyeasyphd/main/python_run_md.py +3 -3
- pyeasyphd/main/python_run_tex.py +1 -1
- pyeasyphd/pyeasyphd.sublime-settings +2 -160
- pyeasyphd/tools/__init__.py +1 -16
- pyeasyphd/tools/generate/generate_from_bibs.py +54 -330
- pyeasyphd/tools/generate/generate_html.py +122 -0
- pyeasyphd/tools/generate/generate_library.py +188 -0
- pyeasyphd/tools/generate/generate_links.py +13 -4
- pyeasyphd/tools/py_run_bib_md_tex.py +12 -13
- pyeasyphd/tools/search/search_base.py +8 -5
- pyeasyphd/tools/search/search_core.py +4 -3
- pyeasyphd/tools/search/search_keywords.py +1 -1
- pyeasyphd/tools/search/search_writers.py +8 -5
- {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/METADATA +3 -6
- pyeasyphd-0.1.2.dist-info/RECORD +27 -0
- pyeasyphd/bib/__init__.py +0 -1
- pyeasyphd/bib/bibtexbase/__init__.py +0 -7
- pyeasyphd/bib/bibtexbase/standardize/_base.py +0 -36
- pyeasyphd/bib/bibtexbase/standardize/default_data.py +0 -97
- pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +0 -54
- pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +0 -38
- pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +0 -310
- pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +0 -35
- pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +0 -34
- pyeasyphd/bib/bibtexbase/standardize_bib.py +0 -75
- pyeasyphd/bib/bibtexparser/__init__.py +0 -47
- pyeasyphd/bib/bibtexparser/bibtex_format.py +0 -87
- pyeasyphd/bib/bibtexparser/exceptions.py +0 -64
- pyeasyphd/bib/bibtexparser/library.py +0 -207
- pyeasyphd/bib/bibtexparser/middlewares/block/add.py +0 -94
- pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +0 -22
- pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +0 -62
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +0 -47
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +0 -31
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +0 -222
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +0 -34
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +0 -33
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +0 -70
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +0 -15
- pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +0 -113
- pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +0 -34
- pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +0 -21
- pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +0 -28
- pyeasyphd/bib/bibtexparser/middlewares/block/title.py +0 -20
- pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +0 -98
- pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +0 -29
- pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +0 -124
- pyeasyphd/bib/bibtexparser/middlewares/middleware.py +0 -222
- pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +0 -13
- pyeasyphd/bib/bibtexparser/middlewares/utils.py +0 -226
- pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +0 -414
- pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +0 -42
- pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +0 -35
- pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +0 -29
- pyeasyphd/bib/bibtexparser/model.py +0 -481
- pyeasyphd/bib/bibtexparser/splitter.py +0 -151
- pyeasyphd/bib/core/__init__.py +0 -18
- pyeasyphd/bib/core/convert_library_to_library.py +0 -31
- pyeasyphd/bib/core/convert_library_to_str.py +0 -199
- pyeasyphd/bib/core/convert_str_to_library.py +0 -34
- pyeasyphd/bib/core/convert_str_to_str.py +0 -27
- pyeasyphd/main/python_run_bib.py +0 -73
- pyeasyphd/main/python_writers.py +0 -212
- pyeasyphd/tools/compare/compare_bibs.py +0 -234
- pyeasyphd/tools/experiments_base.py +0 -203
- pyeasyphd/tools/format_save_bibs.py +0 -178
- pyeasyphd/tools/replace/replace.py +0 -81
- pyeasyphd/tools/spider/process_spider_bib.py +0 -247
- pyeasyphd/tools/spider/process_spider_url.py +0 -75
- pyeasyphd/tools/spider/process_spider_url_bib.py +0 -62
- pyeasyphd-0.1.0.dist-info/RECORD +0 -80
- {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/WHEEL +0 -0
|
@@ -1,64 +0,0 @@
|
|
|
1
|
-
from typing import List, Optional
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class ParsingException(Exception):
|
|
5
|
-
"""Generic Exception for parsing errors."""
|
|
6
|
-
|
|
7
|
-
def __copy__(self):
|
|
8
|
-
# We do not copy or deepcopy ParsingExceptions
|
|
9
|
-
# because they are used as immutables,
|
|
10
|
-
# and because default memo fails.
|
|
11
|
-
return self
|
|
12
|
-
|
|
13
|
-
def __deepcopy__(self, memo):
|
|
14
|
-
# We do not copy or deepcopy ParsingExceptions
|
|
15
|
-
# because they are used as immutables,
|
|
16
|
-
# and because default memo fails.
|
|
17
|
-
return self
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class BlockAbortedException(ParsingException):
|
|
21
|
-
"""Exception where a invalid bibtex file led to an aborted block."""
|
|
22
|
-
|
|
23
|
-
def __init__(
|
|
24
|
-
self,
|
|
25
|
-
abort_reason: str,
|
|
26
|
-
# Not provided if end of file is reached
|
|
27
|
-
end_index: Optional[int] = None,
|
|
28
|
-
):
|
|
29
|
-
self.abort_reason = abort_reason
|
|
30
|
-
self.end_index = end_index
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class ParserStateException(ParsingException):
|
|
34
|
-
"""Parser is in a self-inflicted invalid state."""
|
|
35
|
-
|
|
36
|
-
def __init__(self, message: str):
|
|
37
|
-
self.message = message
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class RegexMismatchException(ParserStateException):
|
|
41
|
-
"""Raised when regex matches are inconsistent, implying a bug in the parser.
|
|
42
|
-
|
|
43
|
-
For example, raised when first match ``@string{``
|
|
44
|
-
is not followed by an overlapping match ``}``.
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
def __init__(self, first_match, expected_match, second_match):
|
|
48
|
-
self.first_match = first_match
|
|
49
|
-
self.expected_match = expected_match
|
|
50
|
-
self.second_match = second_match
|
|
51
|
-
super().__init__(
|
|
52
|
-
f"Regex mismatch: {first_match} followed by {second_match},"
|
|
53
|
-
f"but expected {expected_match}.\n"
|
|
54
|
-
"This is an python-bibtexparser internal error. "
|
|
55
|
-
"Please report this issue at our issue tracker."
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class PartialMiddlewareException(ParsingException):
|
|
60
|
-
"""Exception raised when a middleware could not be fully applied."""
|
|
61
|
-
|
|
62
|
-
def __init__(self, reasons: List[str]):
|
|
63
|
-
reasons_string = "\n\n=====\n\n".join(reasons)
|
|
64
|
-
super().__init__(f"Middleware could not be fully applied: {reasons_string}")
|
|
@@ -1,207 +0,0 @@
|
|
|
1
|
-
from typing import Dict, List, Union
|
|
2
|
-
|
|
3
|
-
from .model import (
|
|
4
|
-
Block,
|
|
5
|
-
DuplicateBlockKeyBlock,
|
|
6
|
-
Entry,
|
|
7
|
-
ExplicitComment,
|
|
8
|
-
ImplicitComment,
|
|
9
|
-
ParsingFailedBlock,
|
|
10
|
-
Preamble,
|
|
11
|
-
String,
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
# TODO Use functools.lru_cache for library properties (which create lists when called)
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class Library(object):
|
|
18
|
-
"""A collection of parsed bibtex blocks."""
|
|
19
|
-
|
|
20
|
-
def __init__(self, blocks: Union[List[Block], None] = None):
|
|
21
|
-
self._blocks = []
|
|
22
|
-
self._entries_by_key = dict()
|
|
23
|
-
self._strings_by_key = dict()
|
|
24
|
-
if blocks is not None:
|
|
25
|
-
self.add(blocks)
|
|
26
|
-
|
|
27
|
-
def add(self, blocks: Union[List[Block], Block], fail_on_duplicate_key: bool = False):
|
|
28
|
-
"""Add blocks to library.
|
|
29
|
-
|
|
30
|
-
The adding is key-safe, i.e., it is made sure that no duplicate keys are added.
|
|
31
|
-
for the same type (i.e., String or Entry). Duplicates are silently replaced with
|
|
32
|
-
a DuplicateKeyBlock.
|
|
33
|
-
|
|
34
|
-
:param blocks: Block or list of blocks to add.
|
|
35
|
-
:param fail_on_duplicate_key:
|
|
36
|
-
If True, raises ValueError if a block was replaced with a DuplicateKeyBlock.
|
|
37
|
-
"""
|
|
38
|
-
if isinstance(blocks, Block):
|
|
39
|
-
blocks = [blocks]
|
|
40
|
-
|
|
41
|
-
_added_blocks = []
|
|
42
|
-
for block in blocks:
|
|
43
|
-
# This may replace block with a DuplicateEntryKeyBlock
|
|
44
|
-
block = self._add_to_dicts(block)
|
|
45
|
-
self._blocks.append(block)
|
|
46
|
-
_added_blocks.append(block)
|
|
47
|
-
|
|
48
|
-
if fail_on_duplicate_key:
|
|
49
|
-
duplicate_keys = []
|
|
50
|
-
for original, added in zip(blocks, _added_blocks):
|
|
51
|
-
if original is not added and isinstance(added, DuplicateBlockKeyBlock):
|
|
52
|
-
duplicate_keys.append(added.key)
|
|
53
|
-
|
|
54
|
-
if len(duplicate_keys) > 0:
|
|
55
|
-
raise ValueError(
|
|
56
|
-
f"Duplicate keys found: {duplicate_keys}. "
|
|
57
|
-
f"Duplicate entries have been added to the library as DuplicateBlockKeyBlock."
|
|
58
|
-
f"Use `library.failed_blocks` to access them. "
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
def remove(self, blocks: Union[List[Block], Block]):
|
|
62
|
-
"""Remove blocks from library.
|
|
63
|
-
|
|
64
|
-
:param blocks: Block or list of blocks to remove.
|
|
65
|
-
:raises ValueError: If block is not in library.
|
|
66
|
-
"""
|
|
67
|
-
if isinstance(blocks, Block):
|
|
68
|
-
blocks = [blocks]
|
|
69
|
-
|
|
70
|
-
for block in blocks:
|
|
71
|
-
self._blocks.remove(block)
|
|
72
|
-
if isinstance(block, Entry):
|
|
73
|
-
del self._entries_by_key[block.key]
|
|
74
|
-
elif isinstance(block, String):
|
|
75
|
-
del self._strings_by_key[block.key]
|
|
76
|
-
|
|
77
|
-
def convert(self, name: str):
|
|
78
|
-
"""Convert the library to special library.
|
|
79
|
-
|
|
80
|
-
Convert the library to special library only contain block
|
|
81
|
-
whose name is `name`, such as `entry`, 'string', ...
|
|
82
|
-
"""
|
|
83
|
-
for block in self._blocks:
|
|
84
|
-
if block.__class__.__name__.lower() != name.lower():
|
|
85
|
-
self.remove(block)
|
|
86
|
-
|
|
87
|
-
def replace(self, old_block: Block, new_block: Block, fail_on_duplicate_key: bool = True):
|
|
88
|
-
"""Replace a block with another block, at the same position.
|
|
89
|
-
|
|
90
|
-
:param old_block: Block to replace.
|
|
91
|
-
:param new_block: Block to replace with.
|
|
92
|
-
:param fail_on_duplicate_key: If False, adds a DuplicateKeyBlock if
|
|
93
|
-
a block with new_block.key (other than old_block) already exists.
|
|
94
|
-
:raises ValueError: If old_block is not in library or if fail_on_duplicate_key is True
|
|
95
|
-
and a block with new_block.key (other than old_block) already exists.
|
|
96
|
-
"""
|
|
97
|
-
try:
|
|
98
|
-
index = self._blocks.index(old_block)
|
|
99
|
-
self.remove(old_block)
|
|
100
|
-
except ValueError:
|
|
101
|
-
raise ValueError("Block to replace is not in library.")
|
|
102
|
-
|
|
103
|
-
block_after_add = self._add_to_dicts(new_block)
|
|
104
|
-
self._blocks.insert(index, block_after_add)
|
|
105
|
-
|
|
106
|
-
if (
|
|
107
|
-
new_block is not block_after_add
|
|
108
|
-
and isinstance(block_after_add, DuplicateBlockKeyBlock)
|
|
109
|
-
and fail_on_duplicate_key
|
|
110
|
-
):
|
|
111
|
-
# Revert changes to old_block
|
|
112
|
-
# Don't fail on duplicate key, as this would lead to an infinite recursion
|
|
113
|
-
# (should never happen for a clean library, but could happen if the user
|
|
114
|
-
# tampered with the internals of the library).
|
|
115
|
-
self.replace(block_after_add, old_block, fail_on_duplicate_key=False)
|
|
116
|
-
raise ValueError("Duplicate key found.")
|
|
117
|
-
|
|
118
|
-
@staticmethod
|
|
119
|
-
def _cast_to_duplicate(
|
|
120
|
-
prev_block_with_same_key: Union[Entry, String], duplicate: Union[Entry, String]
|
|
121
|
-
):
|
|
122
|
-
assert isinstance(prev_block_with_same_key, type(duplicate)) or isinstance(
|
|
123
|
-
duplicate, type(prev_block_with_same_key)
|
|
124
|
-
), (
|
|
125
|
-
"Internal BibtexParser Error. Duplicate blocks share no common type."
|
|
126
|
-
f"Found {type(prev_block_with_same_key)} and {type(duplicate)}, but both should be"
|
|
127
|
-
f"either instance of String or instance of Entry."
|
|
128
|
-
f"Please report this issue at the bibtexparser issue tracker.",
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
assert (
|
|
132
|
-
prev_block_with_same_key.key == duplicate.key
|
|
133
|
-
), "Internal BibtexParser Error. Duplicate blocks have different keys."
|
|
134
|
-
|
|
135
|
-
return DuplicateBlockKeyBlock(
|
|
136
|
-
start_line=duplicate.start_line,
|
|
137
|
-
raw=duplicate.raw,
|
|
138
|
-
key=duplicate.key,
|
|
139
|
-
previous_block=prev_block_with_same_key,
|
|
140
|
-
duplicate_block=duplicate,
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
def _add_to_dicts(self, block):
|
|
144
|
-
"""Safely add block references to private dict structures.
|
|
145
|
-
|
|
146
|
-
:param block: Block to add.
|
|
147
|
-
:returns: The block that was added to the library. If a block
|
|
148
|
-
of same type and with same key already existed, a
|
|
149
|
-
DuplicateKeyBlock is returned (not added to dict).
|
|
150
|
-
"""
|
|
151
|
-
if isinstance(block, Entry):
|
|
152
|
-
try:
|
|
153
|
-
prev_block_with_same_key = self._entries_by_key[block.key]
|
|
154
|
-
block = self._cast_to_duplicate(prev_block_with_same_key, block)
|
|
155
|
-
except KeyError:
|
|
156
|
-
# No duplicate found
|
|
157
|
-
self._entries_by_key[block.key] = block
|
|
158
|
-
elif isinstance(block, String):
|
|
159
|
-
try:
|
|
160
|
-
prev_block_with_same_key = self._strings_by_key[block.key]
|
|
161
|
-
block = self._cast_to_duplicate(prev_block_with_same_key, block)
|
|
162
|
-
except KeyError:
|
|
163
|
-
# No duplicate found
|
|
164
|
-
self._strings_by_key[block.key] = block
|
|
165
|
-
return block
|
|
166
|
-
|
|
167
|
-
@property
|
|
168
|
-
def blocks(self) -> List[Block]:
|
|
169
|
-
"""All blocks in the library, preserving order of insertion."""
|
|
170
|
-
return self._blocks
|
|
171
|
-
|
|
172
|
-
@property
|
|
173
|
-
def failed_blocks(self) -> List[ParsingFailedBlock]:
|
|
174
|
-
"""All blocks that could not be parsed, preserving order of insertion."""
|
|
175
|
-
return [block for block in self._blocks if isinstance(block, ParsingFailedBlock)]
|
|
176
|
-
|
|
177
|
-
@property
|
|
178
|
-
def strings(self) -> List[String]:
|
|
179
|
-
"""All @string blocks in the library, preserving order of insertion."""
|
|
180
|
-
return [block for block in self._blocks if isinstance(block, String)]
|
|
181
|
-
|
|
182
|
-
@property
|
|
183
|
-
def strings_dict(self) -> Dict[str, String]:
|
|
184
|
-
"""Dict representation of all @string blocks in the library."""
|
|
185
|
-
return self._strings_by_key.copy()
|
|
186
|
-
|
|
187
|
-
@property
|
|
188
|
-
def entries(self) -> List[Entry]:
|
|
189
|
-
"""All entry (@article, ...) blocks in the library, preserving order of insertion."""
|
|
190
|
-
# Note: Taking this from the entries dict would be faster, but does not preserve order
|
|
191
|
-
# e.g. in cases where `replace` has been called.
|
|
192
|
-
return [block for block in self._blocks if isinstance(block, Entry)]
|
|
193
|
-
|
|
194
|
-
@property
|
|
195
|
-
def entries_dict(self) -> Dict[str, Entry]:
|
|
196
|
-
"""Dict representation of all entry blocks in the library."""
|
|
197
|
-
return self._entries_by_key.copy()
|
|
198
|
-
|
|
199
|
-
@property
|
|
200
|
-
def preambles(self) -> List[Preamble]:
|
|
201
|
-
"""All @preamble blocks in the library, preserving order of insertion."""
|
|
202
|
-
return [block for block in self._blocks if isinstance(block, Preamble)]
|
|
203
|
-
|
|
204
|
-
@property
|
|
205
|
-
def comments(self) -> List[Union[ExplicitComment, ImplicitComment]]:
|
|
206
|
-
"""All comment blocks in the library, preserving order of insertion."""
|
|
207
|
-
return [block for block in self._blocks if isinstance(block, (ExplicitComment, ImplicitComment))]
|
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
|
|
3
|
-
from ...library import Library
|
|
4
|
-
from ...model import Block, Entry
|
|
5
|
-
from ..middleware import BlockMiddleware
|
|
6
|
-
from ..utils import generate_cite_key_prefix
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class AddArchive(BlockMiddleware):
|
|
10
|
-
"""Add Field `archive`."""
|
|
11
|
-
|
|
12
|
-
def __init__(
|
|
13
|
-
self,
|
|
14
|
-
full_abbr_article_dict: dict,
|
|
15
|
-
full_abbr_inproceedings_dict: dict,
|
|
16
|
-
full_names_in_json: str,
|
|
17
|
-
abbr_names_in_json: str,
|
|
18
|
-
allow_inplace_modification: bool = True
|
|
19
|
-
):
|
|
20
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification)
|
|
21
|
-
|
|
22
|
-
self.full_abbr_article_dict = full_abbr_article_dict
|
|
23
|
-
self.full_abbr_inproceedings_dict = full_abbr_inproceedings_dict
|
|
24
|
-
self.full_names_in_json = full_names_in_json
|
|
25
|
-
self.abbr_names_in_json = abbr_names_in_json
|
|
26
|
-
|
|
27
|
-
# docstr-coverage: inherited
|
|
28
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
29
|
-
entry["archive"] = generate_cite_key_prefix(
|
|
30
|
-
entry,
|
|
31
|
-
self.full_abbr_article_dict,
|
|
32
|
-
self.full_abbr_inproceedings_dict,
|
|
33
|
-
self.full_names_in_json,
|
|
34
|
-
self.abbr_names_in_json
|
|
35
|
-
)
|
|
36
|
-
return entry
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class AddJournalLongAbbr(BlockMiddleware):
|
|
40
|
-
"""Add long abbr for field `journal`."""
|
|
41
|
-
|
|
42
|
-
def __init__(
|
|
43
|
-
self,
|
|
44
|
-
full_abbr_article_dict: dict,
|
|
45
|
-
full_names_in_json: str,
|
|
46
|
-
abbr_names_in_json: str,
|
|
47
|
-
allow_inplace_modification: bool = True
|
|
48
|
-
):
|
|
49
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification)
|
|
50
|
-
|
|
51
|
-
self.full_abbr_article_dict = full_abbr_article_dict
|
|
52
|
-
self.full_names_in_json = full_names_in_json
|
|
53
|
-
self.abbr_names_in_json = abbr_names_in_json
|
|
54
|
-
|
|
55
|
-
# docstr-coverage: inherited
|
|
56
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
57
|
-
return self.generate_journal_booktitle_long_abbr(entry)
|
|
58
|
-
|
|
59
|
-
def generate_journal_booktitle_long_abbr(self, entry) -> Entry:
|
|
60
|
-
# Only for journal
|
|
61
|
-
if entry.entry_type.lower() == "article":
|
|
62
|
-
full_abbr_dict = self.full_abbr_article_dict
|
|
63
|
-
field_key = "journal"
|
|
64
|
-
else:
|
|
65
|
-
return entry
|
|
66
|
-
|
|
67
|
-
# obtain new_dict
|
|
68
|
-
abbr_dict_dict = {}
|
|
69
|
-
for publisher in full_abbr_dict:
|
|
70
|
-
abbr_dict_dict.update({abbr: full_abbr_dict[publisher][abbr] for abbr in full_abbr_dict[publisher]})
|
|
71
|
-
|
|
72
|
-
field_content = entry[field_key] if field_key in entry else ""
|
|
73
|
-
field_content = re.sub(r"\(.*\)", "", field_content).strip()
|
|
74
|
-
|
|
75
|
-
if not field_content:
|
|
76
|
-
return entry
|
|
77
|
-
|
|
78
|
-
# match
|
|
79
|
-
long_abbr_list = []
|
|
80
|
-
for abbr in abbr_dict_dict:
|
|
81
|
-
full_name_list = abbr_dict_dict[abbr].get(self.full_names_in_json, [])
|
|
82
|
-
long_abbr_name_list = abbr_dict_dict[abbr].get(self.abbr_names_in_json, [])
|
|
83
|
-
|
|
84
|
-
for full, long_abbr in zip(full_name_list, long_abbr_name_list):
|
|
85
|
-
if re.match('{' + full + '}', '{' + field_content + '}', re.I):
|
|
86
|
-
long_abbr_list.append(long_abbr)
|
|
87
|
-
|
|
88
|
-
# check
|
|
89
|
-
long_abbr_list = list(set(long_abbr_list))
|
|
90
|
-
if len(long_abbr_list) > 1:
|
|
91
|
-
print(f"Multiple match: {long_abbr_list} for {field_content}.")
|
|
92
|
-
elif len(long_abbr_list) == 1:
|
|
93
|
-
entry["shortjournal"] = long_abbr_list[0]
|
|
94
|
-
return entry
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
from ...library import Library
|
|
2
|
-
from ...model import Block, Entry
|
|
3
|
-
from ..middleware import BlockMiddleware
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class ConstrainNumberOfAuthors(BlockMiddleware):
|
|
7
|
-
"""Constrain the number of authors."""
|
|
8
|
-
|
|
9
|
-
def __init__(self, maximum_authors: int, allow_inplace_modification: bool = True):
|
|
10
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
11
|
-
|
|
12
|
-
self.maximum_authors = maximum_authors
|
|
13
|
-
|
|
14
|
-
# docstr-coverage: inherited
|
|
15
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
16
|
-
if "author" in entry:
|
|
17
|
-
authors = entry["author"].split(" and ")
|
|
18
|
-
if len(authors) > self.maximum_authors:
|
|
19
|
-
authors = authors[:self.maximum_authors]
|
|
20
|
-
authors.append("others")
|
|
21
|
-
entry["author"] = " and ".join(authors)
|
|
22
|
-
return entry
|
|
@@ -1,62 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
|
|
3
|
-
from ...library import Library
|
|
4
|
-
from ...model import Block, Entry
|
|
5
|
-
from ..middleware import BlockMiddleware
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class SimplifyDoiInEntry(BlockMiddleware):
|
|
9
|
-
"""Simplify doi by delete `https://doi.org/` if existed."""
|
|
10
|
-
|
|
11
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
12
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
13
|
-
|
|
14
|
-
# docstr-coverage: inherited
|
|
15
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
16
|
-
if "doi" in entry:
|
|
17
|
-
entry["doi"] = re.sub(r"https*://doi.org/", "", entry["doi"])
|
|
18
|
-
return entry
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class ChooseDoiOrUrlInEntry(BlockMiddleware):
|
|
22
|
-
"""Choose doi when an item has both a doi and a url."""
|
|
23
|
-
|
|
24
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
25
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
26
|
-
|
|
27
|
-
# docstr-coverage: inherited
|
|
28
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
29
|
-
if ("doi" in entry) and (len(entry["doi"]) != 0) and ("url" in entry):
|
|
30
|
-
del entry["url"]
|
|
31
|
-
return entry
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class ChangeDoiToUrlInEntry(BlockMiddleware):
|
|
35
|
-
"""Chang doi to url by add `https://doi.org/` if not existed, and then delete doi."""
|
|
36
|
-
|
|
37
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
38
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
39
|
-
|
|
40
|
-
# docstr-coverage: inherited
|
|
41
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
42
|
-
if "doi" in entry:
|
|
43
|
-
if len(doi := entry["doi"]) != 0:
|
|
44
|
-
if not re.match(r"https*://", doi):
|
|
45
|
-
doi = f"https://doi.org/{doi}"
|
|
46
|
-
entry["url"] = doi
|
|
47
|
-
|
|
48
|
-
del entry["doi"]
|
|
49
|
-
return entry
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
class HttpsUrlInEntry(BlockMiddleware):
|
|
53
|
-
"""Change http to https for security."""
|
|
54
|
-
|
|
55
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
56
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
57
|
-
|
|
58
|
-
# docstr-coverage: inherited
|
|
59
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
60
|
-
if "url" in entry:
|
|
61
|
-
entry["url"] = re.sub(r"https*://", "https://", entry["url"])
|
|
62
|
-
return entry
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from typing import Dict, List, Set
|
|
3
|
-
|
|
4
|
-
from ...library import Library
|
|
5
|
-
from ...model import Entry, Field
|
|
6
|
-
from ..middleware import BlockMiddleware
|
|
7
|
-
|
|
8
|
-
logger = logging.getLogger(__name__)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class NormalizeEntryFieldKeys(BlockMiddleware):
|
|
12
|
-
"""Normalize field keys to lowercase.
|
|
13
|
-
|
|
14
|
-
In case of conflicts (e.g. both 'author' and 'Author' exist in the same entry),
|
|
15
|
-
a warning is emitted, and the last value wins.
|
|
16
|
-
|
|
17
|
-
Some other middlewares, such as `SeparateCoAuthors`, assume lowercase key names.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
21
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
22
|
-
|
|
23
|
-
# docstr-coverage: inherited
|
|
24
|
-
def transform_entry(self, entry: Entry, library: Library) -> Entry:
|
|
25
|
-
seen_normalized_keys: Set[str] = set()
|
|
26
|
-
new_fields_dict: Dict[str, Field] = {}
|
|
27
|
-
for field in entry.fields:
|
|
28
|
-
normalized_key: str = field.key.lower()
|
|
29
|
-
# if the normalized key is already present, apply "last one wins"
|
|
30
|
-
# otherwise preserve insertion order
|
|
31
|
-
# if a key is overwritten, emit a detailed warning
|
|
32
|
-
# if performance is a concern, we could emit a warning with only {entry.key}
|
|
33
|
-
# to remove "seen_normalized_keys" and this if statement
|
|
34
|
-
if normalized_key in seen_normalized_keys:
|
|
35
|
-
logger.warning(
|
|
36
|
-
f"NormalizeFieldKeys: in entry '{entry.key}': "
|
|
37
|
-
+ f"duplicate normalized key '{normalized_key}' "
|
|
38
|
-
+ f"(original '{field.key}'); overriding previous value"
|
|
39
|
-
)
|
|
40
|
-
seen_normalized_keys.add(normalized_key)
|
|
41
|
-
field.key = normalized_key
|
|
42
|
-
new_fields_dict[normalized_key] = field
|
|
43
|
-
|
|
44
|
-
new_fields: List[Field] = list(new_fields_dict.values())
|
|
45
|
-
entry.fields = new_fields
|
|
46
|
-
|
|
47
|
-
return entry
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
from typing import List
|
|
2
|
-
|
|
3
|
-
from ...library import Library
|
|
4
|
-
from ...model import Block, Entry
|
|
5
|
-
from ..middleware import BlockMiddleware
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class ReplaceFieldKeyInEntry(BlockMiddleware):
|
|
9
|
-
"""Replace field key by user."""
|
|
10
|
-
|
|
11
|
-
def __init__(
|
|
12
|
-
self,
|
|
13
|
-
entry_type: str,
|
|
14
|
-
old_field_keys: List[str],
|
|
15
|
-
new_field_keys: List[str],
|
|
16
|
-
allow_inplace_modification: bool = True
|
|
17
|
-
):
|
|
18
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
19
|
-
|
|
20
|
-
self.entry_type = entry_type
|
|
21
|
-
self.old_field_keys = old_field_keys
|
|
22
|
-
self.new_field_keys = new_field_keys
|
|
23
|
-
|
|
24
|
-
# docstr-coverage: inherited
|
|
25
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
26
|
-
if self.entry_type == entry.entry_type:
|
|
27
|
-
for old, new in zip(self.old_field_keys, self.new_field_keys):
|
|
28
|
-
if (old != new) and (old in entry):
|
|
29
|
-
entry[new] = entry[old]
|
|
30
|
-
del entry[old]
|
|
31
|
-
return entry
|