pyeasyphd 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyeasyphd might be problematic. Click here for more details.
- pyeasyphd/.python-version +1 -0
- pyeasyphd/Main.sublime-menu +43 -0
- pyeasyphd/__init__.py +0 -0
- pyeasyphd/bib/__init__.py +1 -0
- pyeasyphd/bib/bibtexbase/__init__.py +7 -0
- pyeasyphd/bib/bibtexbase/standardize/_base.py +36 -0
- pyeasyphd/bib/bibtexbase/standardize/default_data.py +97 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +54 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +38 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +310 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +35 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +34 -0
- pyeasyphd/bib/bibtexbase/standardize_bib.py +75 -0
- pyeasyphd/bib/bibtexparser/__init__.py +47 -0
- pyeasyphd/bib/bibtexparser/bibtex_format.py +87 -0
- pyeasyphd/bib/bibtexparser/exceptions.py +64 -0
- pyeasyphd/bib/bibtexparser/library.py +207 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/add.py +94 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +22 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +62 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +47 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +31 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +222 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +34 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +33 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +70 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +15 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +113 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +34 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +21 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +28 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/title.py +20 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +98 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +29 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +124 -0
- pyeasyphd/bib/bibtexparser/middlewares/middleware.py +222 -0
- pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +13 -0
- pyeasyphd/bib/bibtexparser/middlewares/utils.py +226 -0
- pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +414 -0
- pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +42 -0
- pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +35 -0
- pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +29 -0
- pyeasyphd/bib/bibtexparser/model.py +481 -0
- pyeasyphd/bib/bibtexparser/splitter.py +151 -0
- pyeasyphd/bib/core/__init__.py +18 -0
- pyeasyphd/bib/core/convert_library_to_library.py +31 -0
- pyeasyphd/bib/core/convert_library_to_str.py +199 -0
- pyeasyphd/bib/core/convert_str_to_library.py +34 -0
- pyeasyphd/bib/core/convert_str_to_str.py +27 -0
- pyeasyphd/main/__init__.py +17 -0
- pyeasyphd/main/basic_input.py +149 -0
- pyeasyphd/main/pandoc_md_to.py +361 -0
- pyeasyphd/main/python_run_bib.py +73 -0
- pyeasyphd/main/python_run_md.py +235 -0
- pyeasyphd/main/python_run_tex.py +149 -0
- pyeasyphd/main/python_writers.py +212 -0
- pyeasyphd/pyeasyphd.py +72 -0
- pyeasyphd/pyeasyphd.sublime-settings +235 -0
- pyeasyphd/pyeasyphd.sublime-syntax +5 -0
- pyeasyphd/tools/__init__.py +30 -0
- pyeasyphd/tools/compare/compare_bibs.py +234 -0
- pyeasyphd/tools/experiments_base.py +203 -0
- pyeasyphd/tools/format_save_bibs.py +178 -0
- pyeasyphd/tools/generate/generate_from_bibs.py +447 -0
- pyeasyphd/tools/generate/generate_links.py +356 -0
- pyeasyphd/tools/py_run_bib_md_tex.py +378 -0
- pyeasyphd/tools/replace/replace.py +81 -0
- pyeasyphd/tools/search/data.py +318 -0
- pyeasyphd/tools/search/search_base.py +118 -0
- pyeasyphd/tools/search/search_core.py +326 -0
- pyeasyphd/tools/search/search_keywords.py +227 -0
- pyeasyphd/tools/search/search_writers.py +288 -0
- pyeasyphd/tools/search/utils.py +152 -0
- pyeasyphd/tools/spider/process_spider_bib.py +247 -0
- pyeasyphd/tools/spider/process_spider_url.py +74 -0
- pyeasyphd/tools/spider/process_spider_url_bib.py +62 -0
- pyeasyphd/utils/utils.py +62 -0
- pyeasyphd-0.0.2.dist-info/METADATA +27 -0
- pyeasyphd-0.0.2.dist-info/RECORD +80 -0
- pyeasyphd-0.0.2.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
from typing import List, Tuple
|
|
2
|
+
|
|
3
|
+
from .standardize.do_on_bib import ObtainMarkBlocksDict, SplitBibAccordingToMark
|
|
4
|
+
from .standardize.do_on_comment_block import StandardizeCommentBlock
|
|
5
|
+
from .standardize.do_on_entry_block import StandardizeEntryBlock
|
|
6
|
+
from .standardize.do_on_preamble_block import StandardizePreambleBlock
|
|
7
|
+
from .standardize.do_on_string_block import StandardizeStringBlock
|
|
8
|
+
|
|
9
|
+
MARKS_FLAGS = [
|
|
10
|
+
["comment", "comment", "C"], # comment
|
|
11
|
+
["string", "string", "S"], # string
|
|
12
|
+
["preamble", "preamble", "P"], # preamble
|
|
13
|
+
["article", "entry", "J"], # entry
|
|
14
|
+
["inproceedings", "entry", "C"], # entry
|
|
15
|
+
["proceedings", "entry", "B"], # entry
|
|
16
|
+
["book", "entry", "B"], # entry
|
|
17
|
+
["incollection", "entry", "BS"], # entry
|
|
18
|
+
["misc", "entry", "D"], # entry
|
|
19
|
+
["unpublished", "entry", "M"], # entry
|
|
20
|
+
["techreport", "entry", "R"], # entry
|
|
21
|
+
["phdthesis", "entry", "T_D"], # entry
|
|
22
|
+
["mastersthesis", "entry", "T_M"], # entry
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class StandardizeBib(object):
|
|
27
|
+
"""Stanndardize bib.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
default_additional_field_list (List[str] = []): Additional default fields.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(self, default_additional_field_list: List[str] = []) -> None:
|
|
34
|
+
self._standardize_comment_block = StandardizeCommentBlock()
|
|
35
|
+
self._standardize_entry_block = StandardizeEntryBlock(default_additional_field_list)
|
|
36
|
+
self._standardize_preamble_block = StandardizePreambleBlock()
|
|
37
|
+
self._standardize_string_block = StandardizeStringBlock()
|
|
38
|
+
|
|
39
|
+
def standardize(self, data_list: List[str]) -> Tuple[List[str], List[List[str]]]:
|
|
40
|
+
"""Generate standard bib.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
data_list (List[str]): Bib data.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
List[str]: Standard bib.
|
|
47
|
+
"""
|
|
48
|
+
# Initialize
|
|
49
|
+
data_list = "".join(data_list).splitlines(keepends=True)
|
|
50
|
+
data_list = [line for line in data_list if line.strip()]
|
|
51
|
+
|
|
52
|
+
# Split data according to mark pattern
|
|
53
|
+
data_list = SplitBibAccordingToMark().split_marks(data_list)
|
|
54
|
+
|
|
55
|
+
new_data_list: List[str] = []
|
|
56
|
+
implicit_comment_list: List[List[str]] = []
|
|
57
|
+
|
|
58
|
+
# Generate dict
|
|
59
|
+
mark_blocks_dict, temp_implicit_comment_list = ObtainMarkBlocksDict().obtain_dict(data_list, True)
|
|
60
|
+
implicit_comment_list.extend(temp_implicit_comment_list)
|
|
61
|
+
|
|
62
|
+
marks, flags = [i[0] for i in MARKS_FLAGS], [i[1] for i in MARKS_FLAGS]
|
|
63
|
+
if not_in := {k: v for k, v in mark_blocks_dict.items() if k not in marks}:
|
|
64
|
+
print(f"Warning: Not standard parts - {not_in}")
|
|
65
|
+
|
|
66
|
+
for mark in mark_blocks_dict:
|
|
67
|
+
if mark in marks:
|
|
68
|
+
flag = flags[marks.index(mark)]
|
|
69
|
+
|
|
70
|
+
for block in mark_blocks_dict[mark]:
|
|
71
|
+
block, temp = eval(f"self._standardize_{flag}_block.standardize")(block)
|
|
72
|
+
new_data_list.extend(block)
|
|
73
|
+
implicit_comment_list.extend(temp)
|
|
74
|
+
|
|
75
|
+
return new_data_list, implicit_comment_list
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Initialization.
|
|
2
|
+
|
|
3
|
+
This submodule incorporates modified source code from the python-bibtexparser project
|
|
4
|
+
(https://github.com/sciunto-org/python-bibtexparser), which is licensed under the MIT License.
|
|
5
|
+
The original copyright notice and license terms have been preserved in accordance with the license requirements.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"Block",
|
|
10
|
+
"Field",
|
|
11
|
+
"Entry",
|
|
12
|
+
"ImplicitComment",
|
|
13
|
+
"ExplicitComment",
|
|
14
|
+
"String",
|
|
15
|
+
"Preamble",
|
|
16
|
+
"ParsingFailedBlock",
|
|
17
|
+
"DuplicateBlockKeyBlock",
|
|
18
|
+
"Library",
|
|
19
|
+
|
|
20
|
+
"MiddlewaresStrToStr",
|
|
21
|
+
"MiddlewaresStrToLibrary",
|
|
22
|
+
"MiddlewaresLibraryToLibrary",
|
|
23
|
+
"MiddlewaresLibraryToStr",
|
|
24
|
+
|
|
25
|
+
"Splitter",
|
|
26
|
+
|
|
27
|
+
"BibtexFormat",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
from .bibtex_format import BibtexFormat
|
|
31
|
+
from .library import Library
|
|
32
|
+
from .middlewares_library_to_library import MiddlewaresLibraryToLibrary
|
|
33
|
+
from .middlewares_library_to_str import MiddlewaresLibraryToStr
|
|
34
|
+
from .middlewares_str_to_library import MiddlewaresStrToLibrary
|
|
35
|
+
from .middlewares_str_to_str import MiddlewaresStrToStr
|
|
36
|
+
from .model import (
|
|
37
|
+
Block,
|
|
38
|
+
DuplicateBlockKeyBlock,
|
|
39
|
+
Entry,
|
|
40
|
+
ExplicitComment,
|
|
41
|
+
Field,
|
|
42
|
+
ImplicitComment,
|
|
43
|
+
ParsingFailedBlock,
|
|
44
|
+
Preamble,
|
|
45
|
+
String,
|
|
46
|
+
)
|
|
47
|
+
from .splitter import Splitter
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from typing import Union
|
|
2
|
+
|
|
3
|
+
PARSING_FAILED_COMMENT = "% WARNING Parsing failed for the following {n} lines."
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BibtexFormat(object):
|
|
7
|
+
"""Definition of formatting (alignment, ...) when writing a BibTeX file.
|
|
8
|
+
|
|
9
|
+
Hint: For more manual, GUI-based formatting, see the `bibtex-tidy` tool:
|
|
10
|
+
https://flamingtempura.github.io/bibtex-tidy/
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self):
|
|
14
|
+
self._indent: str = " " # "\t"
|
|
15
|
+
self._align_field_values: Union[int, str] = "auto"
|
|
16
|
+
self._block_separator: str = "" # "\n\n"
|
|
17
|
+
self._trailing_comma: bool = True
|
|
18
|
+
self._parsing_failed_comment: str = PARSING_FAILED_COMMENT
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def indent(self) -> str:
|
|
22
|
+
"""Character(s) for indenting BibTeX field-value pairs. Default: single space."""
|
|
23
|
+
return self._indent
|
|
24
|
+
|
|
25
|
+
@indent.setter
|
|
26
|
+
def indent(self, indent: str) -> None:
|
|
27
|
+
self._indent = indent
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def value_column(self) -> Union[int, str]:
|
|
31
|
+
"""Controls the alignment of field- and string-values. Default: no alignment.
|
|
32
|
+
|
|
33
|
+
This impacts String and Entry blocks.
|
|
34
|
+
|
|
35
|
+
An integer value x specifies that spaces should be added before the " = ",
|
|
36
|
+
such that, if possible, the value is written at column `len(self.indent) + x`.
|
|
37
|
+
Note that for long keys, the value may be written at a later column.
|
|
38
|
+
|
|
39
|
+
Thus, a value of 0 means that the value is written directly after the " = ".
|
|
40
|
+
|
|
41
|
+
The special value "auto" specifies that the bibtex field value should be aligned
|
|
42
|
+
based on the longest key in the library.
|
|
43
|
+
"""
|
|
44
|
+
return self._align_field_values
|
|
45
|
+
|
|
46
|
+
@value_column.setter
|
|
47
|
+
def value_column(self, align_values: Union[int, str]) -> None:
|
|
48
|
+
if isinstance(align_values, int):
|
|
49
|
+
if align_values < 0:
|
|
50
|
+
raise ValueError("align_field_values must be >= 0")
|
|
51
|
+
elif align_values != "auto":
|
|
52
|
+
raise ValueError("align_field_values must be an integer or 'auto'")
|
|
53
|
+
self._align_field_values = align_values
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def block_separator(self) -> str:
|
|
57
|
+
"""Character(s) for separating BibTeX entries.
|
|
58
|
+
|
|
59
|
+
Default: Two lines breaks, i.e., two blank lines.
|
|
60
|
+
"""
|
|
61
|
+
return self._block_separator
|
|
62
|
+
|
|
63
|
+
@block_separator.setter
|
|
64
|
+
def block_separator(self, entry_separator: str) -> None:
|
|
65
|
+
self._block_separator = entry_separator
|
|
66
|
+
|
|
67
|
+
@property
|
|
68
|
+
def trailing_comma(self) -> bool:
|
|
69
|
+
"""Use the trailing comma syntax for BibTeX entries. Default: True.
|
|
70
|
+
|
|
71
|
+
BibTeX syntax allows an optional comma at the end
|
|
72
|
+
of the last field in an entry.
|
|
73
|
+
"""
|
|
74
|
+
return self._trailing_comma
|
|
75
|
+
|
|
76
|
+
@trailing_comma.setter
|
|
77
|
+
def trailing_comma(self, trailing_comma: bool) -> None:
|
|
78
|
+
self._trailing_comma = trailing_comma
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def parsing_failed_comment(self) -> str:
|
|
82
|
+
"""Comment to use for blocks that could not be parsed."""
|
|
83
|
+
return self._parsing_failed_comment
|
|
84
|
+
|
|
85
|
+
@parsing_failed_comment.setter
|
|
86
|
+
def parsing_failed_comment(self, parsing_failed_comment: str) -> None:
|
|
87
|
+
self._parsing_failed_comment = parsing_failed_comment
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from typing import List, Optional
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class ParsingException(Exception):
|
|
5
|
+
"""Generic Exception for parsing errors."""
|
|
6
|
+
|
|
7
|
+
def __copy__(self):
|
|
8
|
+
# We do not copy or deepcopy ParsingExceptions
|
|
9
|
+
# because they are used as immutables,
|
|
10
|
+
# and because default memo fails.
|
|
11
|
+
return self
|
|
12
|
+
|
|
13
|
+
def __deepcopy__(self, memo):
|
|
14
|
+
# We do not copy or deepcopy ParsingExceptions
|
|
15
|
+
# because they are used as immutables,
|
|
16
|
+
# and because default memo fails.
|
|
17
|
+
return self
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BlockAbortedException(ParsingException):
|
|
21
|
+
"""Exception where a invalid bibtex file led to an aborted block."""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
abort_reason: str,
|
|
26
|
+
# Not provided if end of file is reached
|
|
27
|
+
end_index: Optional[int] = None,
|
|
28
|
+
):
|
|
29
|
+
self.abort_reason = abort_reason
|
|
30
|
+
self.end_index = end_index
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ParserStateException(ParsingException):
|
|
34
|
+
"""Parser is in a self-inflicted invalid state."""
|
|
35
|
+
|
|
36
|
+
def __init__(self, message: str):
|
|
37
|
+
self.message = message
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RegexMismatchException(ParserStateException):
|
|
41
|
+
"""Raised when regex matches are inconsistent, implying a bug in the parser.
|
|
42
|
+
|
|
43
|
+
For example, raised when first match ``@string{``
|
|
44
|
+
is not followed by an overlapping match ``}``.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, first_match, expected_match, second_match):
|
|
48
|
+
self.first_match = first_match
|
|
49
|
+
self.expected_match = expected_match
|
|
50
|
+
self.second_match = second_match
|
|
51
|
+
super().__init__(
|
|
52
|
+
f"Regex mismatch: {first_match} followed by {second_match},"
|
|
53
|
+
f"but expected {expected_match}.\n"
|
|
54
|
+
"This is an python-bibtexparser internal error. "
|
|
55
|
+
"Please report this issue at our issue tracker."
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class PartialMiddlewareException(ParsingException):
|
|
60
|
+
"""Exception raised when a middleware could not be fully applied."""
|
|
61
|
+
|
|
62
|
+
def __init__(self, reasons: List[str]):
|
|
63
|
+
reasons_string = "\n\n=====\n\n".join(reasons)
|
|
64
|
+
super().__init__(f"Middleware could not be fully applied: {reasons_string}")
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
from typing import Dict, List, Union
|
|
2
|
+
|
|
3
|
+
from .model import (
|
|
4
|
+
Block,
|
|
5
|
+
DuplicateBlockKeyBlock,
|
|
6
|
+
Entry,
|
|
7
|
+
ExplicitComment,
|
|
8
|
+
ImplicitComment,
|
|
9
|
+
ParsingFailedBlock,
|
|
10
|
+
Preamble,
|
|
11
|
+
String,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# TODO Use functools.lru_cache for library properties (which create lists when called)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Library(object):
|
|
18
|
+
"""A collection of parsed bibtex blocks."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, blocks: Union[List[Block], None] = None):
|
|
21
|
+
self._blocks = []
|
|
22
|
+
self._entries_by_key = dict()
|
|
23
|
+
self._strings_by_key = dict()
|
|
24
|
+
if blocks is not None:
|
|
25
|
+
self.add(blocks)
|
|
26
|
+
|
|
27
|
+
def add(self, blocks: Union[List[Block], Block], fail_on_duplicate_key: bool = False):
|
|
28
|
+
"""Add blocks to library.
|
|
29
|
+
|
|
30
|
+
The adding is key-safe, i.e., it is made sure that no duplicate keys are added.
|
|
31
|
+
for the same type (i.e., String or Entry). Duplicates are silently replaced with
|
|
32
|
+
a DuplicateKeyBlock.
|
|
33
|
+
|
|
34
|
+
:param blocks: Block or list of blocks to add.
|
|
35
|
+
:param fail_on_duplicate_key:
|
|
36
|
+
If True, raises ValueError if a block was replaced with a DuplicateKeyBlock.
|
|
37
|
+
"""
|
|
38
|
+
if isinstance(blocks, Block):
|
|
39
|
+
blocks = [blocks]
|
|
40
|
+
|
|
41
|
+
_added_blocks = []
|
|
42
|
+
for block in blocks:
|
|
43
|
+
# This may replace block with a DuplicateEntryKeyBlock
|
|
44
|
+
block = self._add_to_dicts(block)
|
|
45
|
+
self._blocks.append(block)
|
|
46
|
+
_added_blocks.append(block)
|
|
47
|
+
|
|
48
|
+
if fail_on_duplicate_key:
|
|
49
|
+
duplicate_keys = []
|
|
50
|
+
for original, added in zip(blocks, _added_blocks):
|
|
51
|
+
if original is not added and isinstance(added, DuplicateBlockKeyBlock):
|
|
52
|
+
duplicate_keys.append(added.key)
|
|
53
|
+
|
|
54
|
+
if len(duplicate_keys) > 0:
|
|
55
|
+
raise ValueError(
|
|
56
|
+
f"Duplicate keys found: {duplicate_keys}. "
|
|
57
|
+
f"Duplicate entries have been added to the library as DuplicateBlockKeyBlock."
|
|
58
|
+
f"Use `library.failed_blocks` to access them. "
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def remove(self, blocks: Union[List[Block], Block]):
|
|
62
|
+
"""Remove blocks from library.
|
|
63
|
+
|
|
64
|
+
:param blocks: Block or list of blocks to remove.
|
|
65
|
+
:raises ValueError: If block is not in library.
|
|
66
|
+
"""
|
|
67
|
+
if isinstance(blocks, Block):
|
|
68
|
+
blocks = [blocks]
|
|
69
|
+
|
|
70
|
+
for block in blocks:
|
|
71
|
+
self._blocks.remove(block)
|
|
72
|
+
if isinstance(block, Entry):
|
|
73
|
+
del self._entries_by_key[block.key]
|
|
74
|
+
elif isinstance(block, String):
|
|
75
|
+
del self._strings_by_key[block.key]
|
|
76
|
+
|
|
77
|
+
def convert(self, name: str):
|
|
78
|
+
"""Convert the library to special library.
|
|
79
|
+
|
|
80
|
+
Convert the library to special library only contain block
|
|
81
|
+
whose name is `name`, such as `entry`, 'string', ...
|
|
82
|
+
"""
|
|
83
|
+
for block in self._blocks:
|
|
84
|
+
if block.__class__.__name__.lower() != name.lower():
|
|
85
|
+
self.remove(block)
|
|
86
|
+
|
|
87
|
+
def replace(self, old_block: Block, new_block: Block, fail_on_duplicate_key: bool = True):
|
|
88
|
+
"""Replace a block with another block, at the same position.
|
|
89
|
+
|
|
90
|
+
:param old_block: Block to replace.
|
|
91
|
+
:param new_block: Block to replace with.
|
|
92
|
+
:param fail_on_duplicate_key: If False, adds a DuplicateKeyBlock if
|
|
93
|
+
a block with new_block.key (other than old_block) already exists.
|
|
94
|
+
:raises ValueError: If old_block is not in library or if fail_on_duplicate_key is True
|
|
95
|
+
and a block with new_block.key (other than old_block) already exists.
|
|
96
|
+
"""
|
|
97
|
+
try:
|
|
98
|
+
index = self._blocks.index(old_block)
|
|
99
|
+
self.remove(old_block)
|
|
100
|
+
except ValueError:
|
|
101
|
+
raise ValueError("Block to replace is not in library.")
|
|
102
|
+
|
|
103
|
+
block_after_add = self._add_to_dicts(new_block)
|
|
104
|
+
self._blocks.insert(index, block_after_add)
|
|
105
|
+
|
|
106
|
+
if (
|
|
107
|
+
new_block is not block_after_add
|
|
108
|
+
and isinstance(block_after_add, DuplicateBlockKeyBlock)
|
|
109
|
+
and fail_on_duplicate_key
|
|
110
|
+
):
|
|
111
|
+
# Revert changes to old_block
|
|
112
|
+
# Don't fail on duplicate key, as this would lead to an infinite recursion
|
|
113
|
+
# (should never happen for a clean library, but could happen if the user
|
|
114
|
+
# tampered with the internals of the library).
|
|
115
|
+
self.replace(block_after_add, old_block, fail_on_duplicate_key=False)
|
|
116
|
+
raise ValueError("Duplicate key found.")
|
|
117
|
+
|
|
118
|
+
@staticmethod
|
|
119
|
+
def _cast_to_duplicate(
|
|
120
|
+
prev_block_with_same_key: Union[Entry, String], duplicate: Union[Entry, String]
|
|
121
|
+
):
|
|
122
|
+
assert isinstance(prev_block_with_same_key, type(duplicate)) or isinstance(
|
|
123
|
+
duplicate, type(prev_block_with_same_key)
|
|
124
|
+
), (
|
|
125
|
+
"Internal BibtexParser Error. Duplicate blocks share no common type."
|
|
126
|
+
f"Found {type(prev_block_with_same_key)} and {type(duplicate)}, but both should be"
|
|
127
|
+
f"either instance of String or instance of Entry."
|
|
128
|
+
f"Please report this issue at the bibtexparser issue tracker.",
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
assert (
|
|
132
|
+
prev_block_with_same_key.key == duplicate.key
|
|
133
|
+
), "Internal BibtexParser Error. Duplicate blocks have different keys."
|
|
134
|
+
|
|
135
|
+
return DuplicateBlockKeyBlock(
|
|
136
|
+
start_line=duplicate.start_line,
|
|
137
|
+
raw=duplicate.raw,
|
|
138
|
+
key=duplicate.key,
|
|
139
|
+
previous_block=prev_block_with_same_key,
|
|
140
|
+
duplicate_block=duplicate,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def _add_to_dicts(self, block):
|
|
144
|
+
"""Safely add block references to private dict structures.
|
|
145
|
+
|
|
146
|
+
:param block: Block to add.
|
|
147
|
+
:returns: The block that was added to the library. If a block
|
|
148
|
+
of same type and with same key already existed, a
|
|
149
|
+
DuplicateKeyBlock is returned (not added to dict).
|
|
150
|
+
"""
|
|
151
|
+
if isinstance(block, Entry):
|
|
152
|
+
try:
|
|
153
|
+
prev_block_with_same_key = self._entries_by_key[block.key]
|
|
154
|
+
block = self._cast_to_duplicate(prev_block_with_same_key, block)
|
|
155
|
+
except KeyError:
|
|
156
|
+
# No duplicate found
|
|
157
|
+
self._entries_by_key[block.key] = block
|
|
158
|
+
elif isinstance(block, String):
|
|
159
|
+
try:
|
|
160
|
+
prev_block_with_same_key = self._strings_by_key[block.key]
|
|
161
|
+
block = self._cast_to_duplicate(prev_block_with_same_key, block)
|
|
162
|
+
except KeyError:
|
|
163
|
+
# No duplicate found
|
|
164
|
+
self._strings_by_key[block.key] = block
|
|
165
|
+
return block
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def blocks(self) -> List[Block]:
|
|
169
|
+
"""All blocks in the library, preserving order of insertion."""
|
|
170
|
+
return self._blocks
|
|
171
|
+
|
|
172
|
+
@property
|
|
173
|
+
def failed_blocks(self) -> List[ParsingFailedBlock]:
|
|
174
|
+
"""All blocks that could not be parsed, preserving order of insertion."""
|
|
175
|
+
return [block for block in self._blocks if isinstance(block, ParsingFailedBlock)]
|
|
176
|
+
|
|
177
|
+
@property
|
|
178
|
+
def strings(self) -> List[String]:
|
|
179
|
+
"""All @string blocks in the library, preserving order of insertion."""
|
|
180
|
+
return [block for block in self._blocks if isinstance(block, String)]
|
|
181
|
+
|
|
182
|
+
@property
|
|
183
|
+
def strings_dict(self) -> Dict[str, String]:
|
|
184
|
+
"""Dict representation of all @string blocks in the library."""
|
|
185
|
+
return self._strings_by_key.copy()
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def entries(self) -> List[Entry]:
|
|
189
|
+
"""All entry (@article, ...) blocks in the library, preserving order of insertion."""
|
|
190
|
+
# Note: Taking this from the entries dict would be faster, but does not preserve order
|
|
191
|
+
# e.g. in cases where `replace` has been called.
|
|
192
|
+
return [block for block in self._blocks if isinstance(block, Entry)]
|
|
193
|
+
|
|
194
|
+
@property
|
|
195
|
+
def entries_dict(self) -> Dict[str, Entry]:
|
|
196
|
+
"""Dict representation of all entry blocks in the library."""
|
|
197
|
+
return self._entries_by_key.copy()
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def preambles(self) -> List[Preamble]:
|
|
201
|
+
"""All @preamble blocks in the library, preserving order of insertion."""
|
|
202
|
+
return [block for block in self._blocks if isinstance(block, Preamble)]
|
|
203
|
+
|
|
204
|
+
@property
|
|
205
|
+
def comments(self) -> List[Union[ExplicitComment, ImplicitComment]]:
|
|
206
|
+
"""All comment blocks in the library, preserving order of insertion."""
|
|
207
|
+
return [block for block in self._blocks if isinstance(block, (ExplicitComment, ImplicitComment))]
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from ...library import Library
|
|
4
|
+
from ...model import Block, Entry
|
|
5
|
+
from ..middleware import BlockMiddleware
|
|
6
|
+
from ..utils import generate_cite_key_prefix
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class AddArchive(BlockMiddleware):
|
|
10
|
+
"""Add Field `archive`."""
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
full_abbr_article_dict: dict,
|
|
15
|
+
full_abbr_inproceedings_dict: dict,
|
|
16
|
+
full_names_in_json: str,
|
|
17
|
+
abbr_names_in_json: str,
|
|
18
|
+
allow_inplace_modification: bool = True
|
|
19
|
+
):
|
|
20
|
+
super().__init__(allow_inplace_modification=allow_inplace_modification)
|
|
21
|
+
|
|
22
|
+
self.full_abbr_article_dict = full_abbr_article_dict
|
|
23
|
+
self.full_abbr_inproceedings_dict = full_abbr_inproceedings_dict
|
|
24
|
+
self.full_names_in_json = full_names_in_json
|
|
25
|
+
self.abbr_names_in_json = abbr_names_in_json
|
|
26
|
+
|
|
27
|
+
# docstr-coverage: inherited
|
|
28
|
+
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
29
|
+
entry["archive"] = generate_cite_key_prefix(
|
|
30
|
+
entry,
|
|
31
|
+
self.full_abbr_article_dict,
|
|
32
|
+
self.full_abbr_inproceedings_dict,
|
|
33
|
+
self.full_names_in_json,
|
|
34
|
+
self.abbr_names_in_json
|
|
35
|
+
)
|
|
36
|
+
return entry
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class AddJournalLongAbbr(BlockMiddleware):
|
|
40
|
+
"""Add long abbr for field `journal`."""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
full_abbr_article_dict: dict,
|
|
45
|
+
full_names_in_json: str,
|
|
46
|
+
abbr_names_in_json: str,
|
|
47
|
+
allow_inplace_modification: bool = True
|
|
48
|
+
):
|
|
49
|
+
super().__init__(allow_inplace_modification=allow_inplace_modification)
|
|
50
|
+
|
|
51
|
+
self.full_abbr_article_dict = full_abbr_article_dict
|
|
52
|
+
self.full_names_in_json = full_names_in_json
|
|
53
|
+
self.abbr_names_in_json = abbr_names_in_json
|
|
54
|
+
|
|
55
|
+
# docstr-coverage: inherited
|
|
56
|
+
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
57
|
+
return self.generate_journal_booktitle_long_abbr(entry)
|
|
58
|
+
|
|
59
|
+
def generate_journal_booktitle_long_abbr(self, entry) -> Entry:
|
|
60
|
+
# Only for journal
|
|
61
|
+
if entry.entry_type.lower() == "article":
|
|
62
|
+
full_abbr_dict = self.full_abbr_article_dict
|
|
63
|
+
field_key = "journal"
|
|
64
|
+
else:
|
|
65
|
+
return entry
|
|
66
|
+
|
|
67
|
+
# obtain new_dict
|
|
68
|
+
abbr_dict_dict = {}
|
|
69
|
+
for publisher in full_abbr_dict:
|
|
70
|
+
abbr_dict_dict.update({abbr: full_abbr_dict[publisher][abbr] for abbr in full_abbr_dict[publisher]})
|
|
71
|
+
|
|
72
|
+
field_content = entry[field_key] if field_key in entry else ""
|
|
73
|
+
field_content = re.sub(r"\(.*\)", "", field_content).strip()
|
|
74
|
+
|
|
75
|
+
if not field_content:
|
|
76
|
+
return entry
|
|
77
|
+
|
|
78
|
+
# match
|
|
79
|
+
long_abbr_list = []
|
|
80
|
+
for abbr in abbr_dict_dict:
|
|
81
|
+
full_name_list = abbr_dict_dict[abbr].get(self.full_names_in_json, [])
|
|
82
|
+
long_abbr_name_list = abbr_dict_dict[abbr].get(self.abbr_names_in_json, [])
|
|
83
|
+
|
|
84
|
+
for full, long_abbr in zip(full_name_list, long_abbr_name_list):
|
|
85
|
+
if re.match('{' + full + '}', '{' + field_content + '}', re.I):
|
|
86
|
+
long_abbr_list.append(long_abbr)
|
|
87
|
+
|
|
88
|
+
# check
|
|
89
|
+
long_abbr_list = list(set(long_abbr_list))
|
|
90
|
+
if len(long_abbr_list) > 1:
|
|
91
|
+
print(f"Multiple match: {long_abbr_list} for {field_content}.")
|
|
92
|
+
elif len(long_abbr_list) == 1:
|
|
93
|
+
entry["shortjournal"] = long_abbr_list[0]
|
|
94
|
+
return entry
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from ...library import Library
|
|
2
|
+
from ...model import Block, Entry
|
|
3
|
+
from ..middleware import BlockMiddleware
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ConstrainNumberOfAuthors(BlockMiddleware):
|
|
7
|
+
"""Constrain the number of authors."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, maximum_authors: int, allow_inplace_modification: bool = True):
|
|
10
|
+
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
11
|
+
|
|
12
|
+
self.maximum_authors = maximum_authors
|
|
13
|
+
|
|
14
|
+
# docstr-coverage: inherited
|
|
15
|
+
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
16
|
+
if "author" in entry:
|
|
17
|
+
authors = entry["author"].split(" and ")
|
|
18
|
+
if len(authors) > self.maximum_authors:
|
|
19
|
+
authors = authors[:self.maximum_authors]
|
|
20
|
+
authors.append("others")
|
|
21
|
+
entry["author"] = " and ".join(authors)
|
|
22
|
+
return entry
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from ...library import Library
|
|
4
|
+
from ...model import Block, Entry
|
|
5
|
+
from ..middleware import BlockMiddleware
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class SimplifyDoiInEntry(BlockMiddleware):
|
|
9
|
+
"""Simplify doi by delete `https://doi.org/` if existed."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, allow_inplace_modification: bool = True):
|
|
12
|
+
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
13
|
+
|
|
14
|
+
# docstr-coverage: inherited
|
|
15
|
+
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
16
|
+
if "doi" in entry:
|
|
17
|
+
entry["doi"] = re.sub(r"https*://doi.org/", "", entry["doi"])
|
|
18
|
+
return entry
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ChooseDoiOrUrlInEntry(BlockMiddleware):
|
|
22
|
+
"""Choose doi when an item has both a doi and a url."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, allow_inplace_modification: bool = True):
|
|
25
|
+
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
26
|
+
|
|
27
|
+
# docstr-coverage: inherited
|
|
28
|
+
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
29
|
+
if ("doi" in entry) and (len(entry["doi"]) != 0) and ("url" in entry):
|
|
30
|
+
del entry["url"]
|
|
31
|
+
return entry
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ChangeDoiToUrlInEntry(BlockMiddleware):
|
|
35
|
+
"""Chang doi to url by add `https://doi.org/` if not existed, and then delete doi."""
|
|
36
|
+
|
|
37
|
+
def __init__(self, allow_inplace_modification: bool = True):
|
|
38
|
+
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
39
|
+
|
|
40
|
+
# docstr-coverage: inherited
|
|
41
|
+
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
42
|
+
if "doi" in entry:
|
|
43
|
+
if len(doi := entry["doi"]) != 0:
|
|
44
|
+
if not re.match(r"https*://", doi):
|
|
45
|
+
doi = f"https://doi.org/{doi}"
|
|
46
|
+
entry["url"] = doi
|
|
47
|
+
|
|
48
|
+
del entry["doi"]
|
|
49
|
+
return entry
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class HttpsUrlInEntry(BlockMiddleware):
|
|
53
|
+
"""Change http to https for security."""
|
|
54
|
+
|
|
55
|
+
def __init__(self, allow_inplace_modification: bool = True):
|
|
56
|
+
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
57
|
+
|
|
58
|
+
# docstr-coverage: inherited
|
|
59
|
+
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
60
|
+
if "url" in entry:
|
|
61
|
+
entry["url"] = re.sub(r"https*://", "https://", entry["url"])
|
|
62
|
+
return entry
|