pyeasyphd 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyeasyphd might be problematic. Click here for more details.

Files changed (80) hide show
  1. pyeasyphd-0.0.2/PKG-INFO +27 -0
  2. pyeasyphd-0.0.2/README.md +1 -0
  3. pyeasyphd-0.0.2/pyeasyphd/.python-version +1 -0
  4. pyeasyphd-0.0.2/pyeasyphd/Main.sublime-menu +43 -0
  5. pyeasyphd-0.0.2/pyeasyphd/__init__.py +0 -0
  6. pyeasyphd-0.0.2/pyeasyphd/bib/__init__.py +1 -0
  7. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexbase/__init__.py +7 -0
  8. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexbase/standardize/_base.py +36 -0
  9. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexbase/standardize/default_data.py +97 -0
  10. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +54 -0
  11. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +38 -0
  12. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +310 -0
  13. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +35 -0
  14. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +34 -0
  15. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexbase/standardize_bib.py +75 -0
  16. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/__init__.py +47 -0
  17. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/bibtex_format.py +87 -0
  18. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/exceptions.py +64 -0
  19. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/library.py +207 -0
  20. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/add.py +94 -0
  21. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +22 -0
  22. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +62 -0
  23. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +47 -0
  24. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +31 -0
  25. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +222 -0
  26. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +34 -0
  27. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +33 -0
  28. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +70 -0
  29. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +15 -0
  30. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +113 -0
  31. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +34 -0
  32. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +21 -0
  33. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +28 -0
  34. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/block/title.py +20 -0
  35. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +98 -0
  36. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +29 -0
  37. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +124 -0
  38. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/middleware.py +222 -0
  39. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +13 -0
  40. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares/utils.py +226 -0
  41. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +414 -0
  42. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +42 -0
  43. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +35 -0
  44. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +29 -0
  45. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/model.py +481 -0
  46. pyeasyphd-0.0.2/pyeasyphd/bib/bibtexparser/splitter.py +151 -0
  47. pyeasyphd-0.0.2/pyeasyphd/bib/core/__init__.py +18 -0
  48. pyeasyphd-0.0.2/pyeasyphd/bib/core/convert_library_to_library.py +31 -0
  49. pyeasyphd-0.0.2/pyeasyphd/bib/core/convert_library_to_str.py +199 -0
  50. pyeasyphd-0.0.2/pyeasyphd/bib/core/convert_str_to_library.py +34 -0
  51. pyeasyphd-0.0.2/pyeasyphd/bib/core/convert_str_to_str.py +27 -0
  52. pyeasyphd-0.0.2/pyeasyphd/main/__init__.py +17 -0
  53. pyeasyphd-0.0.2/pyeasyphd/main/basic_input.py +149 -0
  54. pyeasyphd-0.0.2/pyeasyphd/main/pandoc_md_to.py +361 -0
  55. pyeasyphd-0.0.2/pyeasyphd/main/python_run_bib.py +73 -0
  56. pyeasyphd-0.0.2/pyeasyphd/main/python_run_md.py +235 -0
  57. pyeasyphd-0.0.2/pyeasyphd/main/python_run_tex.py +149 -0
  58. pyeasyphd-0.0.2/pyeasyphd/main/python_writers.py +212 -0
  59. pyeasyphd-0.0.2/pyeasyphd/pyeasyphd.py +72 -0
  60. pyeasyphd-0.0.2/pyeasyphd/pyeasyphd.sublime-settings +235 -0
  61. pyeasyphd-0.0.2/pyeasyphd/pyeasyphd.sublime-syntax +5 -0
  62. pyeasyphd-0.0.2/pyeasyphd/tools/__init__.py +30 -0
  63. pyeasyphd-0.0.2/pyeasyphd/tools/compare/compare_bibs.py +234 -0
  64. pyeasyphd-0.0.2/pyeasyphd/tools/experiments_base.py +203 -0
  65. pyeasyphd-0.0.2/pyeasyphd/tools/format_save_bibs.py +178 -0
  66. pyeasyphd-0.0.2/pyeasyphd/tools/generate/generate_from_bibs.py +447 -0
  67. pyeasyphd-0.0.2/pyeasyphd/tools/generate/generate_links.py +356 -0
  68. pyeasyphd-0.0.2/pyeasyphd/tools/py_run_bib_md_tex.py +378 -0
  69. pyeasyphd-0.0.2/pyeasyphd/tools/replace/replace.py +81 -0
  70. pyeasyphd-0.0.2/pyeasyphd/tools/search/data.py +318 -0
  71. pyeasyphd-0.0.2/pyeasyphd/tools/search/search_base.py +118 -0
  72. pyeasyphd-0.0.2/pyeasyphd/tools/search/search_core.py +326 -0
  73. pyeasyphd-0.0.2/pyeasyphd/tools/search/search_keywords.py +227 -0
  74. pyeasyphd-0.0.2/pyeasyphd/tools/search/search_writers.py +288 -0
  75. pyeasyphd-0.0.2/pyeasyphd/tools/search/utils.py +152 -0
  76. pyeasyphd-0.0.2/pyeasyphd/tools/spider/process_spider_bib.py +247 -0
  77. pyeasyphd-0.0.2/pyeasyphd/tools/spider/process_spider_url.py +74 -0
  78. pyeasyphd-0.0.2/pyeasyphd/tools/spider/process_spider_url_bib.py +62 -0
  79. pyeasyphd-0.0.2/pyeasyphd/utils/utils.py +62 -0
  80. pyeasyphd-0.0.2/pyproject.toml +62 -0
@@ -0,0 +1,27 @@
1
+ Metadata-Version: 2.3
2
+ Name: pyeasyphd
3
+ Version: 0.0.2
4
+ Summary: pyeasyphd
5
+ License: GPL-3.0-or-later
6
+ Keywords: Python,Markdown,LaTex
7
+ Author: NextAI
8
+ Author-email: nextartifintell@gmail.com
9
+ Maintainer: NextAI
10
+ Maintainer-email: nextartifintell@gmail.com
11
+ Requires-Python: >=3.8.1
12
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
20
+ Requires-Dist: pyadvtools (>=0.0.3,<0.0.4)
21
+ Project-URL: Documentation, https://github.com/NextArtifIntell/pyeasyphd
22
+ Project-URL: Homepage, https://github.com/NextArtifIntell/pyeasyphd
23
+ Project-URL: Repository, https://github.com/NextArtifIntell/pyeasyphd
24
+ Description-Content-Type: text/markdown
25
+
26
+ # pyeasyphd
27
+
@@ -0,0 +1 @@
1
+ # pyeasyphd
@@ -0,0 +1 @@
1
+ 3.8
@@ -0,0 +1,43 @@
1
+ [
2
+ {
3
+ "caption": "Preferences",
4
+ "id": "preferences",
5
+ "children":
6
+ [
7
+ {
8
+ "caption": "Package Settings",
9
+ "id": "package-settings",
10
+ "children":
11
+ [
12
+ {
13
+ "caption": "PyEasyPhD",
14
+ "children":
15
+ [
16
+ {
17
+ "command": "open_url",
18
+ "args": {"url": "https://github.com/NextArtifIntell/pyeasyphd"},
19
+ "caption": "Documentation"
20
+ },
21
+ { "caption": "-" },
22
+ {
23
+ "command": "open_file",
24
+ "args": {
25
+ "file": "${packages}/pyeasyphd/pyeasyphd.sublime-settings"
26
+ },
27
+ "caption": "Settings – Default (read-only)"
28
+ },
29
+ {
30
+ "command": "open_file",
31
+ "args": {
32
+ "file": "${packages}/User/PyEasyPhD.sublime-settings"
33
+ },
34
+ "caption": "Settings – User"
35
+ },
36
+ { "caption": "-" }
37
+ ]
38
+ }
39
+ ]
40
+ }
41
+ ]
42
+ }
43
+ ]
File without changes
@@ -0,0 +1 @@
1
+ """Initialization."""
@@ -0,0 +1,7 @@
1
+ """Initialization."""
2
+
3
+ __all__ = [
4
+ "StandardizeBib"
5
+ ]
6
+
7
+ from .standardize_bib import StandardizeBib
@@ -0,0 +1,36 @@
1
+ import re
2
+ from typing import List
3
+
4
+
5
+ def split_data_list(split_pattern: str, data_list: List[str], last_next: str = "next") -> List[str]:
6
+ r"""Split data list according to the split pattern.
7
+
8
+ The capturing parentheses must be used in the pattern, such as `(\n)`.
9
+
10
+ Args:
11
+ split_pattern (str): split pattern.
12
+ data_list (List[str]): data list.
13
+ last_next (str): "next" or "last".
14
+
15
+ Returns:
16
+ List[str]: new data list.
17
+
18
+ Examples:
19
+ split_pattern = r"(\n)", last_next = "next" or "last".
20
+ """
21
+ new_data_list = []
22
+ for line in data_list:
23
+ split_list = re.split(split_pattern, line)
24
+ list_one = split_list[0:len(split_list):2]
25
+ list_two = split_list[1:len(split_list):2]
26
+
27
+ temp = []
28
+ if last_next == "next":
29
+ list_two.insert(0, "")
30
+ temp = [list_two[i] + list_one[i] for i in range(len(list_one))]
31
+ if last_next == "last":
32
+ list_two.append("")
33
+ temp = [list_one[i] + list_two[i] for i in range(len(list_one))]
34
+ new_data_list.extend(temp)
35
+ new_data_list = [line for line in new_data_list if line.strip()]
36
+ return new_data_list
@@ -0,0 +1,97 @@
1
+ FIELD_FORMAT_FLAG = [r'"', r"{", r"\w"]
2
+
3
+ DEFAULT_FIELDS_LIST = [
4
+ "abstract",
5
+ "accepted",
6
+ "accessdate",
7
+ "address",
8
+ "affiliation",
9
+ "affiliations",
10
+ "annotation",
11
+ "archive",
12
+ "article-number",
13
+ "articleno",
14
+ "audio",
15
+ "author",
16
+ "author-email",
17
+ "authors",
18
+ "bibsource",
19
+ "biburl",
20
+ "book-group-author",
21
+ "booktitle",
22
+ "cited-references",
23
+ "code",
24
+ "copyright",
25
+ "crossref",
26
+ "data",
27
+ "date",
28
+ "doc-delivery-number",
29
+ "doi",
30
+ "edition",
31
+ "editor",
32
+ "editors",
33
+ "eissn",
34
+ "eprint",
35
+ "extra",
36
+ "fjournal",
37
+ "funding-acknowledgement",
38
+ "funding-text",
39
+ "howpublished",
40
+ "institution",
41
+ "isbn",
42
+ "isbn-13",
43
+ "issn",
44
+ "issue",
45
+ "issue_date",
46
+ "journal",
47
+ "journal-iso",
48
+ "journalabbr",
49
+ "journalabbreviation",
50
+ "key",
51
+ "keywords",
52
+ "keywords-plus",
53
+ "language",
54
+ "lccn",
55
+ "location",
56
+ "month",
57
+ "note",
58
+ "number",
59
+ "number-of-cited-references",
60
+ "numpages",
61
+ "orcid-numbers",
62
+ "organization",
63
+ "pages",
64
+ "pdf",
65
+ "place",
66
+ "pubdates",
67
+ "publisher",
68
+ "remark",
69
+ "research-areas",
70
+ "researcherid-numbers",
71
+ "rights",
72
+ "school",
73
+ "series",
74
+ "shortjournal"
75
+ "shorttitle",
76
+ "slide",
77
+ "submitted",
78
+ "summary",
79
+ "supplementary",
80
+ "times-cited",
81
+ "timestamp",
82
+ "title",
83
+ "type",
84
+ "unique-id",
85
+ "url",
86
+ "usage-count-last-180-days",
87
+ "usage-count-since-2013",
88
+ "video",
89
+ "volume",
90
+ "web-of-science-categories",
91
+ "web-of-science-index",
92
+ "xxeditor",
93
+ "xxnote",
94
+ "xxtitle",
95
+ "xxyear",
96
+ "year",
97
+ ]
@@ -0,0 +1,54 @@
1
+ import re
2
+ from typing import Dict, List, Tuple
3
+
4
+ from ._base import split_data_list
5
+
6
+
7
+ class SplitBibAccordingToMark(object):
8
+ def __init__(self) -> None:
9
+ super().__init__()
10
+
11
+ def split_marks(self, data_list: List[str]) -> List[str]:
12
+ return split_data_list(r"(@[a-zA-Z]+{)", data_list, "next")
13
+
14
+
15
+ class ObtainMarkBlocksDict(object):
16
+ def __init__(self) -> None:
17
+ pass
18
+
19
+ def obtain_dict(
20
+ self, data_list: List[str], is_lower_mark: bool = True
21
+ ) -> Tuple[Dict[str, List[List[str]]], List[List[str]]]:
22
+ r"""Generate blocks.
23
+
24
+ Args:
25
+ data_list (List[str]): data list.
26
+
27
+ Returns:
28
+ Tuple[Dict[str, List[List[str]]], List[str]]: dict and implicit comments.
29
+ """
30
+ regex_mark = re.compile(r"@([a-zA-Z]+){")
31
+ line_index, len_data, implicit_comment_list = 0, len(data_list), []
32
+ mark_patch_bib_list_dict: Dict[str, List[List[str]]] = {}
33
+ while line_index < len_data:
34
+ line = data_list[line_index]
35
+ line_index += 1
36
+ if mch := regex_mark.match(line):
37
+ mark = mch.group(1)
38
+ temp = [line]
39
+ while line_index < len_data:
40
+ line = data_list[line_index]
41
+ if regex_mark.match(line):
42
+ break
43
+ temp.append(line)
44
+ line_index += 1
45
+ if is_lower_mark:
46
+ mark = mark.lower()
47
+ mark_patch_bib_list_dict.setdefault(mark, []).append(temp)
48
+ else:
49
+ implicit_comment_list.append([line, __class__.__name__])
50
+ return mark_patch_bib_list_dict, implicit_comment_list
51
+
52
+
53
+ if __name__ == "__main__":
54
+ pass
@@ -0,0 +1,38 @@
1
+ import re
2
+ from typing import List, Tuple
3
+
4
+
5
+ class StandardizeCommentBlock(object):
6
+ """Stanndardize comment block."""
7
+
8
+ def __init__(self) -> None:
9
+ pass
10
+
11
+ def standardize(self, block: List[str]) -> Tuple[List[str], List[List[str]]]:
12
+ implicit_comments = []
13
+
14
+ regex_comment = re.compile(r"@comment{" + r"(.*)", re.DOTALL)
15
+ if mch := regex_comment.match("".join(block)):
16
+ a = mch.group(1).strip()
17
+ if (ll := (a.count("{") + 1)) > (lr := a.count("}")):
18
+ a += "}" * (ll - lr)
19
+ elif ll < lr:
20
+ a = "{" * (lr - ll) + a
21
+
22
+ if sub_mch := re.match(r"(.*)" + "}" + r"(.*)(\n*)", a):
23
+ block = []
24
+
25
+ sub_a, sub_b, sub_c = sub_mch.groups()
26
+ if sub_a.strip():
27
+ block = ["@comment{" + sub_a.replace("\n", " ").strip() + "}\n"]
28
+
29
+ if sub_b.strip():
30
+ implicit_comments = [[sub_b + sub_c, __class__.__name__]]
31
+
32
+ else:
33
+ block = []
34
+ implicit_comments = [["".join(block), __class__.__name__]]
35
+ else:
36
+ block = []
37
+ implicit_comments = [["".join(block), __class__.__name__]]
38
+ return block, implicit_comments
@@ -0,0 +1,310 @@
1
+ import re
2
+ from typing import Dict, List, Tuple
3
+
4
+ from ._base import split_data_list
5
+ from .default_data import DEFAULT_FIELDS_LIST, FIELD_FORMAT_FLAG
6
+
7
+
8
+ class StandardizeEntryBlock(object):
9
+ """Stanndardize entry block.
10
+
11
+ Args:
12
+ default_additional_field_list (List[str] = []): Additional default fields.
13
+
14
+ Attributes:
15
+ default_fields_list (List[str]): Default fields.
16
+ """
17
+
18
+ def __init__(self, default_additional_field_list: List[str] = []) -> None:
19
+ default_fields_old = [d.lower().strip() for d in DEFAULT_FIELDS_LIST]
20
+ default_fields_new = [d.lower().strip() for d in default_additional_field_list]
21
+ self.default_fields_list = list(set(default_fields_old).union(set(default_fields_new)))
22
+
23
+ def standardize(self, block: List[str]) -> Tuple[List[str], List[List[str]]]:
24
+ # obtain braces or quotes
25
+ implicit_comments = []
26
+ pre, post = EntryBase().obtain_braces_or_quotes(block)
27
+ if (len(pre) == 0) or (len(post) == 0):
28
+ message = f"Obtain braces or quotes: No standard `pre - {pre}` and `post - {post}`"
29
+ implicit_comments.append(["".join(block), message])
30
+ return [], implicit_comments
31
+
32
+ # Obtain fields
33
+ # Support for abbreviations
34
+ field_list = EntryBase().obtain_fields(block, self.default_fields_list)
35
+ if len(field_list) == 0:
36
+ message = "`Obtain fields`: No fields found"
37
+ implicit_comments.append(["".join(block), message])
38
+ return [], implicit_comments
39
+
40
+ # Split according to the field pattern
41
+ # Not support abbreviations: ['year = {2019}, journal = ECJ,']
42
+ # TODO Support abbreviations
43
+ pattern = r"\b((?:{})\s*=\s*(?:{}))".format("|".join(field_list), "|".join([rf"{pre}"])) # compulsory
44
+ block = SplitEntry().split_fields(pattern, block)
45
+
46
+ # Append according to the field pattern
47
+ # Support abbreviations
48
+ block = AppendEntry().append_field(field_list, (pre, post), block)
49
+
50
+ # Extract
51
+ # Support abbreviations
52
+ block, redundant_list = ExtractEntry().extract(field_list, (pre, post), block)
53
+ if len(redundant_list) != 0:
54
+ message = "`Extract`: Redundant content"
55
+ implicit_comments.append(["".join(redundant_list), message])
56
+
57
+ # Check
58
+ # Support abbreviations
59
+ error_dict, block, is_standard_bib_flag = CheckEntry().check(field_list, (pre, post), block)
60
+ if len(error_dict) != 0:
61
+ for key in error_dict:
62
+ implicit_comments.append(["".join(error_dict[key]), f"`Check`: {key}"])
63
+
64
+ if not is_standard_bib_flag:
65
+ implicit_comments.append(["".join(block), "`Check`: Not standard bib"])
66
+ return [], implicit_comments
67
+
68
+ return block, implicit_comments
69
+
70
+
71
+ class EntryBase(object):
72
+ def __init__(self) -> None:
73
+ pass
74
+
75
+ @staticmethod
76
+ def obtain_braces_or_quotes(block: List[str]) -> Tuple[str, str]:
77
+ """Obtain braces or quotes in block.
78
+
79
+ Args:
80
+ block (List[str]): block.
81
+
82
+ Returns:
83
+ Tuple[str, str]: the tuple of braces or quotes.
84
+ """
85
+ content = "".join(block)
86
+ regex_list = [
87
+ re.compile(r'\btitles*\s*=\s*([{"])', flags=re.I),
88
+ re.compile(r'\bauthors*\s*=\s*([{"])', flags=re.I),
89
+ re.compile(r'\byears*\s*=\s*([{"])', flags=re.I),
90
+ re.compile(r'\bpages*\s*=\s*([{"])', flags=re.I),
91
+ re.compile(r'\burls*\s*=\s*([{"])', flags=re.I),
92
+ ]
93
+ flag_list_list = [sorted(list(set(regex.findall(content)))) for regex in regex_list]
94
+
95
+ flag_list_list = [f for f in flag_list_list if len(f) != 0]
96
+ len_list = [len(f) for f in flag_list_list]
97
+
98
+ # 0 or 1 or 2 flags
99
+ if (len(len_list) == 0) or (2 in len_list) or (not all([f == flag_list_list[0] for f in flag_list_list])):
100
+ return "", ""
101
+
102
+ if flag_list_list[0][0] == "{":
103
+ return "{", "}"
104
+ else:
105
+ return '"', '"'
106
+
107
+ def obtain_fields(
108
+ self,
109
+ block: List[str],
110
+ default_fields_list: List[str],
111
+ field_pattern: str = r'[\w\-]+'
112
+ ) -> List[str]:
113
+ r"""Obtain fileds in block.
114
+
115
+ Args:
116
+ block (List[str]): block.
117
+ field_pattern (str = r'[\w\-]+'): field pattern.
118
+
119
+ Returns:
120
+ List[str]: field list.
121
+ """
122
+ regex = re.compile(rf'({field_pattern})\s*=\s*(?:{"|".join(FIELD_FORMAT_FLAG)})') # support for abbreviation
123
+ obtain_field_list = list(set(regex.findall("".join(block))))
124
+ obtain_field_list = [field for field in obtain_field_list if field.lower() in default_fields_list]
125
+ return sorted(obtain_field_list)
126
+
127
+
128
+ class SplitEntry(object):
129
+ def __init__(self) -> None:
130
+ super().__init__()
131
+
132
+ def split_fields(self, field_pattern: str, block: List[str], last_next: str = "next") -> List[str]:
133
+ return split_data_list(field_pattern, block, last_next)
134
+
135
+
136
+ class AppendEntry(object):
137
+ """Append Patch Bib."""
138
+
139
+ def __init__(self) -> None:
140
+ pass
141
+
142
+ @staticmethod
143
+ def append_field(field_list: List[str], braces_or_quotes: Tuple[str, str], block: List[str]) -> List[str]:
144
+ """Append.
145
+
146
+ Args:
147
+ field_list (List[str]): Append field list.
148
+ braces_or_quotes (Tuple[str, str]): Brace or quote.
149
+ data_list (List[str]): Data list.
150
+
151
+ Returns:
152
+ List[str]: new patch bib after appending.
153
+ """
154
+ pre, _ = braces_or_quotes
155
+
156
+ temp = rf'[%\s]*(?:{"|".join(field_list)})'
157
+ regex_field = re.compile(rf"{temp}\s*=\s*{pre}", flags=re.I)
158
+ regex_field_abbr = re.compile(rf"{temp}\s*=\s*\w+[\w\-]*", flags=re.I) # journal = EJC,
159
+ regex_termination = re.compile(r"\s*@[a-zA-Z]*{", flags=re.I)
160
+
161
+ # strip and append
162
+ line_index, len_data, new_block = 0, len(block), []
163
+ while line_index < len_data:
164
+ line = block[line_index]
165
+ line_index += 1
166
+ if regex_field.match(line) or regex_termination.match(line) or regex_field_abbr.match(line):
167
+ new_line = line
168
+ while line_index < len_data:
169
+ line = block[line_index]
170
+ if regex_field.match(line) or regex_termination.match(line) or regex_field_abbr.match(line):
171
+ break
172
+ else:
173
+ if line.lstrip():
174
+ new_line = new_line.rstrip() + " " + line.lstrip() # append
175
+ line_index += 1
176
+ new_block.append(new_line)
177
+ else:
178
+ new_block.append(line)
179
+ return new_block
180
+
181
+
182
+ class ExtractEntry(object):
183
+ def __init__(self) -> None:
184
+ pass
185
+
186
+ def extract(
187
+ self, field_list: List[str], brace_or_quote: Tuple[str, str], block: List[str]
188
+ ) -> Tuple[List[str], List[str]]:
189
+ """Extract.
190
+
191
+ Args:
192
+ field_list (List[str]): field list
193
+ brace_or_quote (Tuple[str, str]): (", ") or ({, })
194
+ block (List[str]): the block
195
+
196
+ Return:
197
+ Tuple[List[str], List[str]]: main block, redundant part
198
+ """
199
+ pre, post = brace_or_quote
200
+
201
+ temp = rf'[%\s]*(?:{"|".join(field_list)})'
202
+ regex_field_two = re.compile(rf"({temp}\s*=\s*{pre})(.*)(\n*)", flags=re.I)
203
+ regex_field_one = re.compile(rf"({temp}\s*=\s*{pre}.*{post})(.*)(\n*)", flags=re.I)
204
+ regex_field_abbr = re.compile(rf"({temp}\s*=\s*\w+[\w\-]*)(.*)(\n*)", flags=re.I)
205
+ regex_termination = re.compile(r"(\s*@[a-zA-Z]*{\s*[\w\-:/\\.\']*)(.*)(\n*)", flags=re.I)
206
+
207
+ main_list, redundant_list = [], []
208
+
209
+ for line in block:
210
+ new_line, redundant = "", ""
211
+ if mch := regex_termination.match(line):
212
+ one, two, three = mch.groups()
213
+ new_line = one + ",\n"
214
+ if re.sub(r"[\s,\n\}]+", "", two):
215
+ redundant = two + three
216
+
217
+ elif mch := regex_field_abbr.match(line):
218
+ one, two, three = mch.groups()
219
+ new_line = one + ",\n"
220
+ if re.sub(r"[\s,\n\}]+", "", two):
221
+ redundant = two + three
222
+
223
+ elif mch := regex_field_one.match(line):
224
+ one, two, three = mch.groups()
225
+ new_line = self._resub_brace_or_quote(pre, post, one + ",\n")
226
+ if re.sub(r"[\s,\n\}]+", "", two):
227
+ redundant = two + three
228
+
229
+ elif mch := regex_field_two.match(line):
230
+ one, two, three = mch.groups()
231
+ new_line = self._resub_brace_or_quote(pre, post, one + two.strip() + post + ",\n")
232
+
233
+ elif line.strip() == "}":
234
+ pass
235
+
236
+ else:
237
+ return [], block
238
+
239
+ if new_line:
240
+ main_list.append(new_line)
241
+ if redundant:
242
+ redundant_list.append(redundant)
243
+
244
+ # for enclosing "@[a-zA-Z]{"
245
+ if main_list:
246
+ main_list.append("}\n")
247
+ return main_list, redundant_list
248
+
249
+ def _resub_brace_or_quote(self, pre, post, line: str) -> str:
250
+ if post == "}":
251
+ if line.count(post) > line.count(pre):
252
+ line = re.sub(r'(}[}\s\n,]*)$', '},\n', line)
253
+ line = add_brace_or_quote(pre, post, line)
254
+
255
+ elif post == '"':
256
+ if line.count(post) > line.count(pre):
257
+ line = re.sub(r'("["\s\n,]*)$', '",\n', line)
258
+ line = add_brace_or_quote(pre, post, line)
259
+ return line
260
+
261
+
262
+ def add_brace_or_quote(pre, post, line: str):
263
+ if (cpre := line.count(pre)) != (cpost := line.count(post)):
264
+ line_list = list(line)
265
+ if cpre > cpost:
266
+ line_list = line_list[::-1]
267
+ line_list.insert(line_list.index(post), post * (cpre - cpost))
268
+ line_list = line_list[::-1]
269
+ else:
270
+ line_list.insert(line.index(pre), pre * (cpost - cpre))
271
+
272
+ line = "".join(line_list)
273
+ return line
274
+
275
+
276
+ class CheckEntry(object):
277
+ @staticmethod
278
+ def check(
279
+ field_list: List[str], brace_or_quote: Tuple[str, str], block: List[str]
280
+ ) -> Tuple[Dict[str, List[str]], List[str], bool]:
281
+ """Check."""
282
+ pre, post = brace_or_quote
283
+
284
+ regex_entry = re.compile(r"\s*@[a-zA-Z]+{")
285
+ regex_field = re.compile(fr'\s*(?:{"|".join(field_list)})' + r"\s*=")
286
+ entry_flag, brace_flag = False, False # minimal conditions
287
+ error_dict: Dict[str, List[str]] = {}
288
+ new_block = []
289
+ for line in block:
290
+ if regex_entry.match(line) and (not entry_flag): # just iff exsiting one time in single patch bib
291
+ if (line.count("{") != 1) or (line.count(",") != 1):
292
+ error_dict.setdefault("Failed entry_type", []).append(line)
293
+ else:
294
+ entry_flag = True
295
+ new_block.append(line)
296
+
297
+ elif regex_field.match(line):
298
+ new_block.append(add_brace_or_quote(pre, post, line))
299
+
300
+ elif (line.strip() == "}") and (not brace_flag): # just iff exsiting one time in single patch bib
301
+ brace_flag = True
302
+ new_block.append(line)
303
+
304
+ else:
305
+ error_dict.setdefault("Redundant content`", []).append(line)
306
+ return error_dict, new_block, entry_flag and brace_flag
307
+
308
+
309
+ if __name__ == "__main__":
310
+ pass
@@ -0,0 +1,35 @@
1
+ import re
2
+ from typing import List, Tuple
3
+
4
+
5
+ class StandardizePreambleBlock(object):
6
+ """Stanndardize preamble block."""
7
+
8
+ def __init__(self) -> None:
9
+ pass
10
+
11
+ def standardize(self, block: List[str]) -> Tuple[List[str], List[List[str]]]:
12
+ # @preamble{ "\providecommand{\noopsort}[1]{} " }
13
+ implicit_comments = []
14
+ regex_preamble = re.compile(
15
+ r"@preamble{" + r'\s*(")' + r"([\w\-\\\[\]\{\}\s]+)" + r'(")\s*' + r"(.*)(\n*)", re.DOTALL
16
+ )
17
+ mch = regex_preamble.match("".join(block))
18
+ if mch:
19
+ a, b, c, d, e = mch.groups()
20
+ if (a == '"') and (c == '"'):
21
+ block = ["@preamble{ " + a + b.replace("\n", " ").strip() + c + " }\n"]
22
+
23
+ if d and d.lstrip()[0] == "}":
24
+ d = d.lstrip()[1:].lstrip()
25
+
26
+ if d.strip():
27
+ implicit_comments = [[d + e, __class__.__name__]]
28
+
29
+ else:
30
+ block = []
31
+ implicit_comments = [["".join(block), __class__.__name__]]
32
+ else:
33
+ block = []
34
+ implicit_comments = [["".join(block), __class__.__name__]]
35
+ return block, implicit_comments