pyeasyphd 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyeasyphd might be problematic. Click here for more details.
- pyeasyphd/.python-version +1 -0
- pyeasyphd/Main.sublime-menu +43 -0
- pyeasyphd/__init__.py +0 -0
- pyeasyphd/bib/__init__.py +1 -0
- pyeasyphd/bib/bibtexbase/__init__.py +7 -0
- pyeasyphd/bib/bibtexbase/standardize/_base.py +36 -0
- pyeasyphd/bib/bibtexbase/standardize/default_data.py +97 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +54 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +38 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +310 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +35 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +34 -0
- pyeasyphd/bib/bibtexbase/standardize_bib.py +75 -0
- pyeasyphd/bib/bibtexparser/__init__.py +47 -0
- pyeasyphd/bib/bibtexparser/bibtex_format.py +87 -0
- pyeasyphd/bib/bibtexparser/exceptions.py +64 -0
- pyeasyphd/bib/bibtexparser/library.py +207 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/add.py +94 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +22 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +62 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +47 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +31 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +222 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +34 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +33 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +70 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +15 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +113 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +34 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +21 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +28 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/title.py +20 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +98 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +29 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +124 -0
- pyeasyphd/bib/bibtexparser/middlewares/middleware.py +222 -0
- pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +13 -0
- pyeasyphd/bib/bibtexparser/middlewares/utils.py +226 -0
- pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +414 -0
- pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +42 -0
- pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +35 -0
- pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +29 -0
- pyeasyphd/bib/bibtexparser/model.py +481 -0
- pyeasyphd/bib/bibtexparser/splitter.py +151 -0
- pyeasyphd/bib/core/__init__.py +18 -0
- pyeasyphd/bib/core/convert_library_to_library.py +31 -0
- pyeasyphd/bib/core/convert_library_to_str.py +199 -0
- pyeasyphd/bib/core/convert_str_to_library.py +34 -0
- pyeasyphd/bib/core/convert_str_to_str.py +27 -0
- pyeasyphd/main/__init__.py +17 -0
- pyeasyphd/main/basic_input.py +149 -0
- pyeasyphd/main/pandoc_md_to.py +361 -0
- pyeasyphd/main/python_run_bib.py +73 -0
- pyeasyphd/main/python_run_md.py +235 -0
- pyeasyphd/main/python_run_tex.py +149 -0
- pyeasyphd/main/python_writers.py +212 -0
- pyeasyphd/pyeasyphd.py +72 -0
- pyeasyphd/pyeasyphd.sublime-settings +235 -0
- pyeasyphd/pyeasyphd.sublime-syntax +5 -0
- pyeasyphd/tools/__init__.py +30 -0
- pyeasyphd/tools/compare/compare_bibs.py +234 -0
- pyeasyphd/tools/experiments_base.py +203 -0
- pyeasyphd/tools/format_save_bibs.py +178 -0
- pyeasyphd/tools/generate/generate_from_bibs.py +447 -0
- pyeasyphd/tools/generate/generate_links.py +356 -0
- pyeasyphd/tools/py_run_bib_md_tex.py +378 -0
- pyeasyphd/tools/replace/replace.py +81 -0
- pyeasyphd/tools/search/data.py +318 -0
- pyeasyphd/tools/search/search_base.py +118 -0
- pyeasyphd/tools/search/search_core.py +326 -0
- pyeasyphd/tools/search/search_keywords.py +227 -0
- pyeasyphd/tools/search/search_writers.py +288 -0
- pyeasyphd/tools/search/utils.py +152 -0
- pyeasyphd/tools/spider/process_spider_bib.py +247 -0
- pyeasyphd/tools/spider/process_spider_url.py +74 -0
- pyeasyphd/tools/spider/process_spider_url_bib.py +62 -0
- pyeasyphd/utils/utils.py +62 -0
- pyeasyphd-0.0.2.dist-info/METADATA +27 -0
- pyeasyphd-0.0.2.dist-info/RECORD +80 -0
- pyeasyphd-0.0.2.dist-info/WHEEL +4 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.8
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"caption": "Preferences",
|
|
4
|
+
"id": "preferences",
|
|
5
|
+
"children":
|
|
6
|
+
[
|
|
7
|
+
{
|
|
8
|
+
"caption": "Package Settings",
|
|
9
|
+
"id": "package-settings",
|
|
10
|
+
"children":
|
|
11
|
+
[
|
|
12
|
+
{
|
|
13
|
+
"caption": "PyEasyPhD",
|
|
14
|
+
"children":
|
|
15
|
+
[
|
|
16
|
+
{
|
|
17
|
+
"command": "open_url",
|
|
18
|
+
"args": {"url": "https://github.com/NextArtifIntell/pyeasyphd"},
|
|
19
|
+
"caption": "Documentation"
|
|
20
|
+
},
|
|
21
|
+
{ "caption": "-" },
|
|
22
|
+
{
|
|
23
|
+
"command": "open_file",
|
|
24
|
+
"args": {
|
|
25
|
+
"file": "${packages}/pyeasyphd/pyeasyphd.sublime-settings"
|
|
26
|
+
},
|
|
27
|
+
"caption": "Settings – Default (read-only)"
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"command": "open_file",
|
|
31
|
+
"args": {
|
|
32
|
+
"file": "${packages}/User/PyEasyPhD.sublime-settings"
|
|
33
|
+
},
|
|
34
|
+
"caption": "Settings – User"
|
|
35
|
+
},
|
|
36
|
+
{ "caption": "-" }
|
|
37
|
+
]
|
|
38
|
+
}
|
|
39
|
+
]
|
|
40
|
+
}
|
|
41
|
+
]
|
|
42
|
+
}
|
|
43
|
+
]
|
pyeasyphd/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Initialization."""
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def split_data_list(split_pattern: str, data_list: List[str], last_next: str = "next") -> List[str]:
|
|
6
|
+
r"""Split data list according to the split pattern.
|
|
7
|
+
|
|
8
|
+
The capturing parentheses must be used in the pattern, such as `(\n)`.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
split_pattern (str): split pattern.
|
|
12
|
+
data_list (List[str]): data list.
|
|
13
|
+
last_next (str): "next" or "last".
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
List[str]: new data list.
|
|
17
|
+
|
|
18
|
+
Examples:
|
|
19
|
+
split_pattern = r"(\n)", last_next = "next" or "last".
|
|
20
|
+
"""
|
|
21
|
+
new_data_list = []
|
|
22
|
+
for line in data_list:
|
|
23
|
+
split_list = re.split(split_pattern, line)
|
|
24
|
+
list_one = split_list[0:len(split_list):2]
|
|
25
|
+
list_two = split_list[1:len(split_list):2]
|
|
26
|
+
|
|
27
|
+
temp = []
|
|
28
|
+
if last_next == "next":
|
|
29
|
+
list_two.insert(0, "")
|
|
30
|
+
temp = [list_two[i] + list_one[i] for i in range(len(list_one))]
|
|
31
|
+
if last_next == "last":
|
|
32
|
+
list_two.append("")
|
|
33
|
+
temp = [list_one[i] + list_two[i] for i in range(len(list_one))]
|
|
34
|
+
new_data_list.extend(temp)
|
|
35
|
+
new_data_list = [line for line in new_data_list if line.strip()]
|
|
36
|
+
return new_data_list
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
FIELD_FORMAT_FLAG = [r'"', r"{", r"\w"]
|
|
2
|
+
|
|
3
|
+
DEFAULT_FIELDS_LIST = [
|
|
4
|
+
"abstract",
|
|
5
|
+
"accepted",
|
|
6
|
+
"accessdate",
|
|
7
|
+
"address",
|
|
8
|
+
"affiliation",
|
|
9
|
+
"affiliations",
|
|
10
|
+
"annotation",
|
|
11
|
+
"archive",
|
|
12
|
+
"article-number",
|
|
13
|
+
"articleno",
|
|
14
|
+
"audio",
|
|
15
|
+
"author",
|
|
16
|
+
"author-email",
|
|
17
|
+
"authors",
|
|
18
|
+
"bibsource",
|
|
19
|
+
"biburl",
|
|
20
|
+
"book-group-author",
|
|
21
|
+
"booktitle",
|
|
22
|
+
"cited-references",
|
|
23
|
+
"code",
|
|
24
|
+
"copyright",
|
|
25
|
+
"crossref",
|
|
26
|
+
"data",
|
|
27
|
+
"date",
|
|
28
|
+
"doc-delivery-number",
|
|
29
|
+
"doi",
|
|
30
|
+
"edition",
|
|
31
|
+
"editor",
|
|
32
|
+
"editors",
|
|
33
|
+
"eissn",
|
|
34
|
+
"eprint",
|
|
35
|
+
"extra",
|
|
36
|
+
"fjournal",
|
|
37
|
+
"funding-acknowledgement",
|
|
38
|
+
"funding-text",
|
|
39
|
+
"howpublished",
|
|
40
|
+
"institution",
|
|
41
|
+
"isbn",
|
|
42
|
+
"isbn-13",
|
|
43
|
+
"issn",
|
|
44
|
+
"issue",
|
|
45
|
+
"issue_date",
|
|
46
|
+
"journal",
|
|
47
|
+
"journal-iso",
|
|
48
|
+
"journalabbr",
|
|
49
|
+
"journalabbreviation",
|
|
50
|
+
"key",
|
|
51
|
+
"keywords",
|
|
52
|
+
"keywords-plus",
|
|
53
|
+
"language",
|
|
54
|
+
"lccn",
|
|
55
|
+
"location",
|
|
56
|
+
"month",
|
|
57
|
+
"note",
|
|
58
|
+
"number",
|
|
59
|
+
"number-of-cited-references",
|
|
60
|
+
"numpages",
|
|
61
|
+
"orcid-numbers",
|
|
62
|
+
"organization",
|
|
63
|
+
"pages",
|
|
64
|
+
"pdf",
|
|
65
|
+
"place",
|
|
66
|
+
"pubdates",
|
|
67
|
+
"publisher",
|
|
68
|
+
"remark",
|
|
69
|
+
"research-areas",
|
|
70
|
+
"researcherid-numbers",
|
|
71
|
+
"rights",
|
|
72
|
+
"school",
|
|
73
|
+
"series",
|
|
74
|
+
"shortjournal"
|
|
75
|
+
"shorttitle",
|
|
76
|
+
"slide",
|
|
77
|
+
"submitted",
|
|
78
|
+
"summary",
|
|
79
|
+
"supplementary",
|
|
80
|
+
"times-cited",
|
|
81
|
+
"timestamp",
|
|
82
|
+
"title",
|
|
83
|
+
"type",
|
|
84
|
+
"unique-id",
|
|
85
|
+
"url",
|
|
86
|
+
"usage-count-last-180-days",
|
|
87
|
+
"usage-count-since-2013",
|
|
88
|
+
"video",
|
|
89
|
+
"volume",
|
|
90
|
+
"web-of-science-categories",
|
|
91
|
+
"web-of-science-index",
|
|
92
|
+
"xxeditor",
|
|
93
|
+
"xxnote",
|
|
94
|
+
"xxtitle",
|
|
95
|
+
"xxyear",
|
|
96
|
+
"year",
|
|
97
|
+
]
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, List, Tuple
|
|
3
|
+
|
|
4
|
+
from ._base import split_data_list
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SplitBibAccordingToMark(object):
|
|
8
|
+
def __init__(self) -> None:
|
|
9
|
+
super().__init__()
|
|
10
|
+
|
|
11
|
+
def split_marks(self, data_list: List[str]) -> List[str]:
|
|
12
|
+
return split_data_list(r"(@[a-zA-Z]+{)", data_list, "next")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ObtainMarkBlocksDict(object):
|
|
16
|
+
def __init__(self) -> None:
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
def obtain_dict(
|
|
20
|
+
self, data_list: List[str], is_lower_mark: bool = True
|
|
21
|
+
) -> Tuple[Dict[str, List[List[str]]], List[List[str]]]:
|
|
22
|
+
r"""Generate blocks.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
data_list (List[str]): data list.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
Tuple[Dict[str, List[List[str]]], List[str]]: dict and implicit comments.
|
|
29
|
+
"""
|
|
30
|
+
regex_mark = re.compile(r"@([a-zA-Z]+){")
|
|
31
|
+
line_index, len_data, implicit_comment_list = 0, len(data_list), []
|
|
32
|
+
mark_patch_bib_list_dict: Dict[str, List[List[str]]] = {}
|
|
33
|
+
while line_index < len_data:
|
|
34
|
+
line = data_list[line_index]
|
|
35
|
+
line_index += 1
|
|
36
|
+
if mch := regex_mark.match(line):
|
|
37
|
+
mark = mch.group(1)
|
|
38
|
+
temp = [line]
|
|
39
|
+
while line_index < len_data:
|
|
40
|
+
line = data_list[line_index]
|
|
41
|
+
if regex_mark.match(line):
|
|
42
|
+
break
|
|
43
|
+
temp.append(line)
|
|
44
|
+
line_index += 1
|
|
45
|
+
if is_lower_mark:
|
|
46
|
+
mark = mark.lower()
|
|
47
|
+
mark_patch_bib_list_dict.setdefault(mark, []).append(temp)
|
|
48
|
+
else:
|
|
49
|
+
implicit_comment_list.append([line, __class__.__name__])
|
|
50
|
+
return mark_patch_bib_list_dict, implicit_comment_list
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
if __name__ == "__main__":
|
|
54
|
+
pass
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class StandardizeCommentBlock(object):
|
|
6
|
+
"""Stanndardize comment block."""
|
|
7
|
+
|
|
8
|
+
def __init__(self) -> None:
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
def standardize(self, block: List[str]) -> Tuple[List[str], List[List[str]]]:
|
|
12
|
+
implicit_comments = []
|
|
13
|
+
|
|
14
|
+
regex_comment = re.compile(r"@comment{" + r"(.*)", re.DOTALL)
|
|
15
|
+
if mch := regex_comment.match("".join(block)):
|
|
16
|
+
a = mch.group(1).strip()
|
|
17
|
+
if (ll := (a.count("{") + 1)) > (lr := a.count("}")):
|
|
18
|
+
a += "}" * (ll - lr)
|
|
19
|
+
elif ll < lr:
|
|
20
|
+
a = "{" * (lr - ll) + a
|
|
21
|
+
|
|
22
|
+
if sub_mch := re.match(r"(.*)" + "}" + r"(.*)(\n*)", a):
|
|
23
|
+
block = []
|
|
24
|
+
|
|
25
|
+
sub_a, sub_b, sub_c = sub_mch.groups()
|
|
26
|
+
if sub_a.strip():
|
|
27
|
+
block = ["@comment{" + sub_a.replace("\n", " ").strip() + "}\n"]
|
|
28
|
+
|
|
29
|
+
if sub_b.strip():
|
|
30
|
+
implicit_comments = [[sub_b + sub_c, __class__.__name__]]
|
|
31
|
+
|
|
32
|
+
else:
|
|
33
|
+
block = []
|
|
34
|
+
implicit_comments = [["".join(block), __class__.__name__]]
|
|
35
|
+
else:
|
|
36
|
+
block = []
|
|
37
|
+
implicit_comments = [["".join(block), __class__.__name__]]
|
|
38
|
+
return block, implicit_comments
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Dict, List, Tuple
|
|
3
|
+
|
|
4
|
+
from ._base import split_data_list
|
|
5
|
+
from .default_data import DEFAULT_FIELDS_LIST, FIELD_FORMAT_FLAG
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class StandardizeEntryBlock(object):
|
|
9
|
+
"""Stanndardize entry block.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
default_additional_field_list (List[str] = []): Additional default fields.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
default_fields_list (List[str]): Default fields.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, default_additional_field_list: List[str] = []) -> None:
|
|
19
|
+
default_fields_old = [d.lower().strip() for d in DEFAULT_FIELDS_LIST]
|
|
20
|
+
default_fields_new = [d.lower().strip() for d in default_additional_field_list]
|
|
21
|
+
self.default_fields_list = list(set(default_fields_old).union(set(default_fields_new)))
|
|
22
|
+
|
|
23
|
+
def standardize(self, block: List[str]) -> Tuple[List[str], List[List[str]]]:
|
|
24
|
+
# obtain braces or quotes
|
|
25
|
+
implicit_comments = []
|
|
26
|
+
pre, post = EntryBase().obtain_braces_or_quotes(block)
|
|
27
|
+
if (len(pre) == 0) or (len(post) == 0):
|
|
28
|
+
message = f"Obtain braces or quotes: No standard `pre - {pre}` and `post - {post}`"
|
|
29
|
+
implicit_comments.append(["".join(block), message])
|
|
30
|
+
return [], implicit_comments
|
|
31
|
+
|
|
32
|
+
# Obtain fields
|
|
33
|
+
# Support for abbreviations
|
|
34
|
+
field_list = EntryBase().obtain_fields(block, self.default_fields_list)
|
|
35
|
+
if len(field_list) == 0:
|
|
36
|
+
message = "`Obtain fields`: No fields found"
|
|
37
|
+
implicit_comments.append(["".join(block), message])
|
|
38
|
+
return [], implicit_comments
|
|
39
|
+
|
|
40
|
+
# Split according to the field pattern
|
|
41
|
+
# Not support abbreviations: ['year = {2019}, journal = ECJ,']
|
|
42
|
+
# TODO Support abbreviations
|
|
43
|
+
pattern = r"\b((?:{})\s*=\s*(?:{}))".format("|".join(field_list), "|".join([rf"{pre}"])) # compulsory
|
|
44
|
+
block = SplitEntry().split_fields(pattern, block)
|
|
45
|
+
|
|
46
|
+
# Append according to the field pattern
|
|
47
|
+
# Support abbreviations
|
|
48
|
+
block = AppendEntry().append_field(field_list, (pre, post), block)
|
|
49
|
+
|
|
50
|
+
# Extract
|
|
51
|
+
# Support abbreviations
|
|
52
|
+
block, redundant_list = ExtractEntry().extract(field_list, (pre, post), block)
|
|
53
|
+
if len(redundant_list) != 0:
|
|
54
|
+
message = "`Extract`: Redundant content"
|
|
55
|
+
implicit_comments.append(["".join(redundant_list), message])
|
|
56
|
+
|
|
57
|
+
# Check
|
|
58
|
+
# Support abbreviations
|
|
59
|
+
error_dict, block, is_standard_bib_flag = CheckEntry().check(field_list, (pre, post), block)
|
|
60
|
+
if len(error_dict) != 0:
|
|
61
|
+
for key in error_dict:
|
|
62
|
+
implicit_comments.append(["".join(error_dict[key]), f"`Check`: {key}"])
|
|
63
|
+
|
|
64
|
+
if not is_standard_bib_flag:
|
|
65
|
+
implicit_comments.append(["".join(block), "`Check`: Not standard bib"])
|
|
66
|
+
return [], implicit_comments
|
|
67
|
+
|
|
68
|
+
return block, implicit_comments
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class EntryBase(object):
|
|
72
|
+
def __init__(self) -> None:
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def obtain_braces_or_quotes(block: List[str]) -> Tuple[str, str]:
|
|
77
|
+
"""Obtain braces or quotes in block.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
block (List[str]): block.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Tuple[str, str]: the tuple of braces or quotes.
|
|
84
|
+
"""
|
|
85
|
+
content = "".join(block)
|
|
86
|
+
regex_list = [
|
|
87
|
+
re.compile(r'\btitles*\s*=\s*([{"])', flags=re.I),
|
|
88
|
+
re.compile(r'\bauthors*\s*=\s*([{"])', flags=re.I),
|
|
89
|
+
re.compile(r'\byears*\s*=\s*([{"])', flags=re.I),
|
|
90
|
+
re.compile(r'\bpages*\s*=\s*([{"])', flags=re.I),
|
|
91
|
+
re.compile(r'\burls*\s*=\s*([{"])', flags=re.I),
|
|
92
|
+
]
|
|
93
|
+
flag_list_list = [sorted(list(set(regex.findall(content)))) for regex in regex_list]
|
|
94
|
+
|
|
95
|
+
flag_list_list = [f for f in flag_list_list if len(f) != 0]
|
|
96
|
+
len_list = [len(f) for f in flag_list_list]
|
|
97
|
+
|
|
98
|
+
# 0 or 1 or 2 flags
|
|
99
|
+
if (len(len_list) == 0) or (2 in len_list) or (not all([f == flag_list_list[0] for f in flag_list_list])):
|
|
100
|
+
return "", ""
|
|
101
|
+
|
|
102
|
+
if flag_list_list[0][0] == "{":
|
|
103
|
+
return "{", "}"
|
|
104
|
+
else:
|
|
105
|
+
return '"', '"'
|
|
106
|
+
|
|
107
|
+
def obtain_fields(
|
|
108
|
+
self,
|
|
109
|
+
block: List[str],
|
|
110
|
+
default_fields_list: List[str],
|
|
111
|
+
field_pattern: str = r'[\w\-]+'
|
|
112
|
+
) -> List[str]:
|
|
113
|
+
r"""Obtain fileds in block.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
block (List[str]): block.
|
|
117
|
+
field_pattern (str = r'[\w\-]+'): field pattern.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
List[str]: field list.
|
|
121
|
+
"""
|
|
122
|
+
regex = re.compile(rf'({field_pattern})\s*=\s*(?:{"|".join(FIELD_FORMAT_FLAG)})') # support for abbreviation
|
|
123
|
+
obtain_field_list = list(set(regex.findall("".join(block))))
|
|
124
|
+
obtain_field_list = [field for field in obtain_field_list if field.lower() in default_fields_list]
|
|
125
|
+
return sorted(obtain_field_list)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class SplitEntry(object):
|
|
129
|
+
def __init__(self) -> None:
|
|
130
|
+
super().__init__()
|
|
131
|
+
|
|
132
|
+
def split_fields(self, field_pattern: str, block: List[str], last_next: str = "next") -> List[str]:
|
|
133
|
+
return split_data_list(field_pattern, block, last_next)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class AppendEntry(object):
|
|
137
|
+
"""Append Patch Bib."""
|
|
138
|
+
|
|
139
|
+
def __init__(self) -> None:
|
|
140
|
+
pass
|
|
141
|
+
|
|
142
|
+
@staticmethod
|
|
143
|
+
def append_field(field_list: List[str], braces_or_quotes: Tuple[str, str], block: List[str]) -> List[str]:
|
|
144
|
+
"""Append.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
field_list (List[str]): Append field list.
|
|
148
|
+
braces_or_quotes (Tuple[str, str]): Brace or quote.
|
|
149
|
+
data_list (List[str]): Data list.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
List[str]: new patch bib after appending.
|
|
153
|
+
"""
|
|
154
|
+
pre, _ = braces_or_quotes
|
|
155
|
+
|
|
156
|
+
temp = rf'[%\s]*(?:{"|".join(field_list)})'
|
|
157
|
+
regex_field = re.compile(rf"{temp}\s*=\s*{pre}", flags=re.I)
|
|
158
|
+
regex_field_abbr = re.compile(rf"{temp}\s*=\s*\w+[\w\-]*", flags=re.I) # journal = EJC,
|
|
159
|
+
regex_termination = re.compile(r"\s*@[a-zA-Z]*{", flags=re.I)
|
|
160
|
+
|
|
161
|
+
# strip and append
|
|
162
|
+
line_index, len_data, new_block = 0, len(block), []
|
|
163
|
+
while line_index < len_data:
|
|
164
|
+
line = block[line_index]
|
|
165
|
+
line_index += 1
|
|
166
|
+
if regex_field.match(line) or regex_termination.match(line) or regex_field_abbr.match(line):
|
|
167
|
+
new_line = line
|
|
168
|
+
while line_index < len_data:
|
|
169
|
+
line = block[line_index]
|
|
170
|
+
if regex_field.match(line) or regex_termination.match(line) or regex_field_abbr.match(line):
|
|
171
|
+
break
|
|
172
|
+
else:
|
|
173
|
+
if line.lstrip():
|
|
174
|
+
new_line = new_line.rstrip() + " " + line.lstrip() # append
|
|
175
|
+
line_index += 1
|
|
176
|
+
new_block.append(new_line)
|
|
177
|
+
else:
|
|
178
|
+
new_block.append(line)
|
|
179
|
+
return new_block
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class ExtractEntry(object):
|
|
183
|
+
def __init__(self) -> None:
|
|
184
|
+
pass
|
|
185
|
+
|
|
186
|
+
def extract(
|
|
187
|
+
self, field_list: List[str], brace_or_quote: Tuple[str, str], block: List[str]
|
|
188
|
+
) -> Tuple[List[str], List[str]]:
|
|
189
|
+
"""Extract.
|
|
190
|
+
|
|
191
|
+
Args:
|
|
192
|
+
field_list (List[str]): field list
|
|
193
|
+
brace_or_quote (Tuple[str, str]): (", ") or ({, })
|
|
194
|
+
block (List[str]): the block
|
|
195
|
+
|
|
196
|
+
Return:
|
|
197
|
+
Tuple[List[str], List[str]]: main block, redundant part
|
|
198
|
+
"""
|
|
199
|
+
pre, post = brace_or_quote
|
|
200
|
+
|
|
201
|
+
temp = rf'[%\s]*(?:{"|".join(field_list)})'
|
|
202
|
+
regex_field_two = re.compile(rf"({temp}\s*=\s*{pre})(.*)(\n*)", flags=re.I)
|
|
203
|
+
regex_field_one = re.compile(rf"({temp}\s*=\s*{pre}.*{post})(.*)(\n*)", flags=re.I)
|
|
204
|
+
regex_field_abbr = re.compile(rf"({temp}\s*=\s*\w+[\w\-]*)(.*)(\n*)", flags=re.I)
|
|
205
|
+
regex_termination = re.compile(r"(\s*@[a-zA-Z]*{\s*[\w\-:/\\.\']*)(.*)(\n*)", flags=re.I)
|
|
206
|
+
|
|
207
|
+
main_list, redundant_list = [], []
|
|
208
|
+
|
|
209
|
+
for line in block:
|
|
210
|
+
new_line, redundant = "", ""
|
|
211
|
+
if mch := regex_termination.match(line):
|
|
212
|
+
one, two, three = mch.groups()
|
|
213
|
+
new_line = one + ",\n"
|
|
214
|
+
if re.sub(r"[\s,\n\}]+", "", two):
|
|
215
|
+
redundant = two + three
|
|
216
|
+
|
|
217
|
+
elif mch := regex_field_abbr.match(line):
|
|
218
|
+
one, two, three = mch.groups()
|
|
219
|
+
new_line = one + ",\n"
|
|
220
|
+
if re.sub(r"[\s,\n\}]+", "", two):
|
|
221
|
+
redundant = two + three
|
|
222
|
+
|
|
223
|
+
elif mch := regex_field_one.match(line):
|
|
224
|
+
one, two, three = mch.groups()
|
|
225
|
+
new_line = self._resub_brace_or_quote(pre, post, one + ",\n")
|
|
226
|
+
if re.sub(r"[\s,\n\}]+", "", two):
|
|
227
|
+
redundant = two + three
|
|
228
|
+
|
|
229
|
+
elif mch := regex_field_two.match(line):
|
|
230
|
+
one, two, three = mch.groups()
|
|
231
|
+
new_line = self._resub_brace_or_quote(pre, post, one + two.strip() + post + ",\n")
|
|
232
|
+
|
|
233
|
+
elif line.strip() == "}":
|
|
234
|
+
pass
|
|
235
|
+
|
|
236
|
+
else:
|
|
237
|
+
return [], block
|
|
238
|
+
|
|
239
|
+
if new_line:
|
|
240
|
+
main_list.append(new_line)
|
|
241
|
+
if redundant:
|
|
242
|
+
redundant_list.append(redundant)
|
|
243
|
+
|
|
244
|
+
# for enclosing "@[a-zA-Z]{"
|
|
245
|
+
if main_list:
|
|
246
|
+
main_list.append("}\n")
|
|
247
|
+
return main_list, redundant_list
|
|
248
|
+
|
|
249
|
+
def _resub_brace_or_quote(self, pre, post, line: str) -> str:
|
|
250
|
+
if post == "}":
|
|
251
|
+
if line.count(post) > line.count(pre):
|
|
252
|
+
line = re.sub(r'(}[}\s\n,]*)$', '},\n', line)
|
|
253
|
+
line = add_brace_or_quote(pre, post, line)
|
|
254
|
+
|
|
255
|
+
elif post == '"':
|
|
256
|
+
if line.count(post) > line.count(pre):
|
|
257
|
+
line = re.sub(r'("["\s\n,]*)$', '",\n', line)
|
|
258
|
+
line = add_brace_or_quote(pre, post, line)
|
|
259
|
+
return line
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def add_brace_or_quote(pre, post, line: str):
|
|
263
|
+
if (cpre := line.count(pre)) != (cpost := line.count(post)):
|
|
264
|
+
line_list = list(line)
|
|
265
|
+
if cpre > cpost:
|
|
266
|
+
line_list = line_list[::-1]
|
|
267
|
+
line_list.insert(line_list.index(post), post * (cpre - cpost))
|
|
268
|
+
line_list = line_list[::-1]
|
|
269
|
+
else:
|
|
270
|
+
line_list.insert(line.index(pre), pre * (cpost - cpre))
|
|
271
|
+
|
|
272
|
+
line = "".join(line_list)
|
|
273
|
+
return line
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
class CheckEntry(object):
|
|
277
|
+
@staticmethod
|
|
278
|
+
def check(
|
|
279
|
+
field_list: List[str], brace_or_quote: Tuple[str, str], block: List[str]
|
|
280
|
+
) -> Tuple[Dict[str, List[str]], List[str], bool]:
|
|
281
|
+
"""Check."""
|
|
282
|
+
pre, post = brace_or_quote
|
|
283
|
+
|
|
284
|
+
regex_entry = re.compile(r"\s*@[a-zA-Z]+{")
|
|
285
|
+
regex_field = re.compile(fr'\s*(?:{"|".join(field_list)})' + r"\s*=")
|
|
286
|
+
entry_flag, brace_flag = False, False # minimal conditions
|
|
287
|
+
error_dict: Dict[str, List[str]] = {}
|
|
288
|
+
new_block = []
|
|
289
|
+
for line in block:
|
|
290
|
+
if regex_entry.match(line) and (not entry_flag): # just iff exsiting one time in single patch bib
|
|
291
|
+
if (line.count("{") != 1) or (line.count(",") != 1):
|
|
292
|
+
error_dict.setdefault("Failed entry_type", []).append(line)
|
|
293
|
+
else:
|
|
294
|
+
entry_flag = True
|
|
295
|
+
new_block.append(line)
|
|
296
|
+
|
|
297
|
+
elif regex_field.match(line):
|
|
298
|
+
new_block.append(add_brace_or_quote(pre, post, line))
|
|
299
|
+
|
|
300
|
+
elif (line.strip() == "}") and (not brace_flag): # just iff exsiting one time in single patch bib
|
|
301
|
+
brace_flag = True
|
|
302
|
+
new_block.append(line)
|
|
303
|
+
|
|
304
|
+
else:
|
|
305
|
+
error_dict.setdefault("Redundant content`", []).append(line)
|
|
306
|
+
return error_dict, new_block, entry_flag and brace_flag
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
if __name__ == "__main__":
|
|
310
|
+
pass
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class StandardizePreambleBlock(object):
|
|
6
|
+
"""Stanndardize preamble block."""
|
|
7
|
+
|
|
8
|
+
def __init__(self) -> None:
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
def standardize(self, block: List[str]) -> Tuple[List[str], List[List[str]]]:
|
|
12
|
+
# @preamble{ "\providecommand{\noopsort}[1]{} " }
|
|
13
|
+
implicit_comments = []
|
|
14
|
+
regex_preamble = re.compile(
|
|
15
|
+
r"@preamble{" + r'\s*(")' + r"([\w\-\\\[\]\{\}\s]+)" + r'(")\s*' + r"(.*)(\n*)", re.DOTALL
|
|
16
|
+
)
|
|
17
|
+
mch = regex_preamble.match("".join(block))
|
|
18
|
+
if mch:
|
|
19
|
+
a, b, c, d, e = mch.groups()
|
|
20
|
+
if (a == '"') and (c == '"'):
|
|
21
|
+
block = ["@preamble{ " + a + b.replace("\n", " ").strip() + c + " }\n"]
|
|
22
|
+
|
|
23
|
+
if d and d.lstrip()[0] == "}":
|
|
24
|
+
d = d.lstrip()[1:].lstrip()
|
|
25
|
+
|
|
26
|
+
if d.strip():
|
|
27
|
+
implicit_comments = [[d + e, __class__.__name__]]
|
|
28
|
+
|
|
29
|
+
else:
|
|
30
|
+
block = []
|
|
31
|
+
implicit_comments = [["".join(block), __class__.__name__]]
|
|
32
|
+
else:
|
|
33
|
+
block = []
|
|
34
|
+
implicit_comments = [["".join(block), __class__.__name__]]
|
|
35
|
+
return block, implicit_comments
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class StandardizeStringBlock(object):
|
|
6
|
+
"""Stanndardize string block."""
|
|
7
|
+
|
|
8
|
+
def __init__(self) -> None:
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
def standardize(self, block: List[str]) -> Tuple[List[str], List[List[str]]]:
|
|
12
|
+
implicit_comments = []
|
|
13
|
+
|
|
14
|
+
regex = re.compile(
|
|
15
|
+
r"@string{" + r"\s*([\w]+)\s*=\s*" + r'(["{])' + r"([\w\-\n]+)" + r'(["}])' + r'(.*)(\n*)', re.DOTALL
|
|
16
|
+
)
|
|
17
|
+
if mch := regex.match("".join(block)):
|
|
18
|
+
a, b, c, d, e, f = mch.groups()
|
|
19
|
+
if ((b == '"') and (d == '"')) or ((b == '{') and (d == '}')):
|
|
20
|
+
block = ["@string{" + a + " = " + b + c.replace("\n", " ").strip() + d + "}\n"]
|
|
21
|
+
|
|
22
|
+
if e and e.lstrip()[0] == "}":
|
|
23
|
+
e = e.lstrip()[1:].lstrip()
|
|
24
|
+
|
|
25
|
+
if e.strip():
|
|
26
|
+
implicit_comments = [[e + f, __class__.__name__]]
|
|
27
|
+
|
|
28
|
+
else:
|
|
29
|
+
block = []
|
|
30
|
+
implicit_comments = [["".join(block), __class__.__name__]]
|
|
31
|
+
else:
|
|
32
|
+
block = []
|
|
33
|
+
implicit_comments = [["".join(block), __class__.__name__]]
|
|
34
|
+
return block, implicit_comments
|