PyPI - pyeasyphd - Versions diffs - 0.0.2__py3-none-any.whl - Mend

pyeasyphd 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pyeasyphd might be problematic. Click here for more details.

Files changed (80) hide show

pyeasyphd/.python-version +1 -0
pyeasyphd/Main.sublime-menu +43 -0
pyeasyphd/__init__.py +0 -0
pyeasyphd/bib/__init__.py +1 -0
pyeasyphd/bib/bibtexbase/__init__.py +7 -0
pyeasyphd/bib/bibtexbase/standardize/_base.py +36 -0
pyeasyphd/bib/bibtexbase/standardize/default_data.py +97 -0
pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +54 -0
pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +38 -0
pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +310 -0
pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +35 -0
pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +34 -0
pyeasyphd/bib/bibtexbase/standardize_bib.py +75 -0
pyeasyphd/bib/bibtexparser/__init__.py +47 -0
pyeasyphd/bib/bibtexparser/bibtex_format.py +87 -0
pyeasyphd/bib/bibtexparser/exceptions.py +64 -0
pyeasyphd/bib/bibtexparser/library.py +207 -0
pyeasyphd/bib/bibtexparser/middlewares/block/add.py +94 -0
pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +22 -0
pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +62 -0
pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +47 -0
pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +31 -0
pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +222 -0
pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +34 -0
pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +33 -0
pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +70 -0
pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +15 -0
pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +113 -0
pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +34 -0
pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +21 -0
pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +28 -0
pyeasyphd/bib/bibtexparser/middlewares/block/title.py +20 -0
pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +98 -0
pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +29 -0
pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +124 -0
pyeasyphd/bib/bibtexparser/middlewares/middleware.py +222 -0
pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +13 -0
pyeasyphd/bib/bibtexparser/middlewares/utils.py +226 -0
pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +414 -0
pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +42 -0
pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +35 -0
pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +29 -0
pyeasyphd/bib/bibtexparser/model.py +481 -0
pyeasyphd/bib/bibtexparser/splitter.py +151 -0
pyeasyphd/bib/core/__init__.py +18 -0
pyeasyphd/bib/core/convert_library_to_library.py +31 -0
pyeasyphd/bib/core/convert_library_to_str.py +199 -0
pyeasyphd/bib/core/convert_str_to_library.py +34 -0
pyeasyphd/bib/core/convert_str_to_str.py +27 -0
pyeasyphd/main/__init__.py +17 -0
pyeasyphd/main/basic_input.py +149 -0
pyeasyphd/main/pandoc_md_to.py +361 -0
pyeasyphd/main/python_run_bib.py +73 -0
pyeasyphd/main/python_run_md.py +235 -0
pyeasyphd/main/python_run_tex.py +149 -0
pyeasyphd/main/python_writers.py +212 -0
pyeasyphd/pyeasyphd.py +72 -0
pyeasyphd/pyeasyphd.sublime-settings +235 -0
pyeasyphd/pyeasyphd.sublime-syntax +5 -0
pyeasyphd/tools/__init__.py +30 -0
pyeasyphd/tools/compare/compare_bibs.py +234 -0
pyeasyphd/tools/experiments_base.py +203 -0
pyeasyphd/tools/format_save_bibs.py +178 -0
pyeasyphd/tools/generate/generate_from_bibs.py +447 -0
pyeasyphd/tools/generate/generate_links.py +356 -0
pyeasyphd/tools/py_run_bib_md_tex.py +378 -0
pyeasyphd/tools/replace/replace.py +81 -0
pyeasyphd/tools/search/data.py +318 -0
pyeasyphd/tools/search/search_base.py +118 -0
pyeasyphd/tools/search/search_core.py +326 -0
pyeasyphd/tools/search/search_keywords.py +227 -0
pyeasyphd/tools/search/search_writers.py +288 -0
pyeasyphd/tools/search/utils.py +152 -0
pyeasyphd/tools/spider/process_spider_bib.py +247 -0
pyeasyphd/tools/spider/process_spider_url.py +74 -0
pyeasyphd/tools/spider/process_spider_url_bib.py +62 -0
pyeasyphd/utils/utils.py +62 -0
pyeasyphd-0.0.2.dist-info/METADATA +27 -0
pyeasyphd-0.0.2.dist-info/RECORD +80 -0
pyeasyphd-0.0.2.dist-info/WHEEL +4 -0

pyeasyphd/tools/search/search_writers.py ADDED Viewed

@@ -0,0 +1,288 @@
+import copy
+import os
+from typing import Dict, List, Tuple
+from pyadvtools import (
+    combine_content_in_list,
+    read_list,
+    write_list,
+)
+from ...bib.bibtexparser import Library
+from ...main import PandocMdTo, PythonWriters
+from ...tools.search.utils import (
+    combine_keywords_for_file_name,
+    combine_keywords_for_title,
+    keywords_type_for_title,
+)
+class WriteInitialResult(object):
+    """Write initial results for single keyword.
+    Args:
+        options: dict
+    Attributes:
+        options (dict): options
+    """
+    def __init__(self, options: dict) -> None:
+        self.options = options
+        self._level_title_md = "###"
+        self._level_title_tex = "subsection"
+        self._pandoc_md_to = PandocMdTo(options)
+    def main(
+        self,
+        path_initial: str,
+        output_prefix: str,
+        field: str,
+        keywords_type: str,
+        combine_keywords: str,
+        library_for_abbr: Library,
+        library_for_zotero: Library,
+        library_for_save: Library,
+    ) -> Tuple[List[List[str]], List[str]]:
+        error_pandoc_md_md = []
+        # generate
+        cite_keys = [entry.key for entry in library_for_abbr.entries]
+        # update options
+        _options = copy.deepcopy(self.options)
+        _options["keep_entries_by_cite_keys"] = cite_keys
+        _python_writer = PythonWriters(_options)
+        # generate tex and md data
+        data_list_tex, data_list_md, header = self.generate_content_tex_md(
+            cite_keys, output_prefix, field, combine_keywords
+        )
+        # definition
+        file_prefix = combine_keywords_for_file_name(combine_keywords)  # the file name prefix
+        # write tex, md, and bib files
+        data_list = [data_list_tex, data_list_md, library_for_abbr, library_for_zotero, library_for_save]
+        mid_list = ["", "", "-abbr", "-zotero", "-save"]
+        post_list = ["tex", "md", "bib", "bib", "bib"]
+        path_write = os.path.join(path_initial, f"{field}-{keywords_type}")
+        for i in range(len(post_list)):
+            file_name = f"{file_prefix}{mid_list[i]}.{post_list[i]}"
+            _python_writer.write_to_file(data_list[i], file_name, "w", path_write)
+        # pandoc md to generate md file
+        path_bib = f"{path_write}/{file_prefix}{mid_list[2]}.bib"  # bib_for_abbr
+        data_list_pandoc_md = self._pandoc_md_to.pandoc_md_to_md(
+            path_bib,
+            path_write,
+            path_write,
+            f"{file_prefix}.md",
+            f"{file_prefix}-pandoc.md",
+        )
+        # mian part
+        # generate some md output data
+        data_basic_md: List[str] = []
+        data_beauty_md: List[str] = []
+        data_complex_md: List[str] = []
+        if data_list_pandoc_md:
+            data_basic_md, data_beauty_md, data_complex_md = self.generate_basic_beauty_complex_md(
+                header, cite_keys, data_list_pandoc_md, library_for_zotero
+            )
+        else:
+            error_pandoc_md_md.append(f"- pandoc full false: {file_prefix}_pandoc.md" + "\n")
+        # write basic beauty complex md files
+        basic_beauty_complex = ["-basic", "-beauty", "-complex"]
+        for d, name in zip([data_basic_md, data_beauty_md, data_complex_md], basic_beauty_complex):
+            write_list(d, "{}{}.md".format(file_prefix, name), "w", path_write)
+        # save all (tex, md, bib) files
+        x = [f"{i}.{j}" for i, j in zip(mid_list, post_list)]
+        x.extend([f"{i}.md" for i in basic_beauty_complex])
+        data_temp = [[os.path.join(path_write, file_prefix + i)] for i in x]
+        return data_temp, error_pandoc_md_md
+    def generate_basic_beauty_complex_md(
+        self, header: str, cite_key_list: List[str], data_list_pandoc_md: List[str], library_for_zotero: Library
+    ) -> Tuple[List[str], List[str], List[str]]:
+        data_basic_md, data_beauty_md, data_complex_md = [], [], []
+        # library
+        _options = copy.deepcopy(self.options)
+        _python_writer = PythonWriters(_options)
+        key_url_http_bib_dict = _python_writer.output_key_url_http_bib_dict(library_for_zotero)
+        key_basic_dict, key_beauty_dict, key_complex_dict = self._pandoc_md_to.generate_key_data_dict(
+            data_list_pandoc_md, key_url_http_bib_dict
+        )
+        if key_basic_dict and key_beauty_dict and key_complex_dict:
+            data_basic_md, data_beauty_md, data_complex_md = [header + "\n"], [header + "\n"], [header + "\n"]
+            for i in range(length := len(cite_key_list)):
+                data_basic_md.extend(self._convert_to_special_list(key_basic_dict.get(cite_key_list[i], [])))
+                data_beauty_md.extend(self._convert_to_special_list(key_beauty_dict.get(cite_key_list[i], [])))
+                data_complex_md.extend(self._convert_to_special_list(key_complex_dict.get(cite_key_list[i], [])))
+                if i < (length - 1):
+                    data_basic_md.append("\n")
+                    data_beauty_md.append("\n")
+                    data_complex_md.append("\n")
+        return data_basic_md, data_beauty_md, data_complex_md
+    @staticmethod
+    def _convert_to_special_list(data_list: List[str]) -> List[str]:
+        if len(data_list) > 0:
+            data_list[0] = "- " + data_list[0]
+        for j in range(len(data_list) - 1):
+            if data_list[j][-1] == "\n":
+                data_list[j + 1] = "  " + data_list[j + 1]
+        return data_list
+    def generate_content_tex_md(
+        self, cite_key_list: List[str], output_prefix: str, field: str, combine_keywords: str
+    ) -> Tuple[List[str], List[str], str]:
+        """Generate."""
+        c_k_f_t = combine_keywords_for_title(combine_keywords)
+        number_references = len(cite_key_list)
+        _title = f"{output_prefix} {field} contains {number_references} {c_k_f_t}"
+        tex_header = f"\\{self._level_title_tex}" + "{" + _title + "}\n"
+        tex_body = ["\\nocite{" + f"{c_k}" + "}\n" for c_k in cite_key_list]
+        tex_tail = "\\printbibliography\n\n\\ifx \\clearPage \\undefined \\else \\clearpage \\fi\n"
+        data_list_tex = combine_content_in_list([[tex_header], ["\n"], tex_body, ["\n"], [tex_tail]])
+        md_header = f"{self._level_title_md}" + " " + _title + "\n"
+        md_body = [r"- [@" + f"{c_k}" + "]\n" for c_k in cite_key_list]
+        data_list_md = combine_content_in_list([[md_header], ["\n"], md_body])
+        return data_list_tex, data_list_md, md_header
+class WriteSeparateResult(object):
+    """Write separate result."""
+    def __init__(self) -> None:
+        self._level_title_md = "##"
+        self._level_title_tex = "section"
+    def main(
+        self, data_temp: List[List[str]], field: str, keywords_type: str, combine_keywords: str, path_separate: str
+    ) -> None:
+        k_t_f_t = keywords_type_for_title(keywords_type)
+        _title = f"{field.title()} contains {k_t_f_t}"
+        file_prefix = combine_keywords_for_file_name(combine_keywords)  # the file name prefix
+        mid_list = ["", "", "-abbr", "-zotero", "-save", "-basic", "-beauty", "-complex"]
+        post_list = ["tex", "md", "bib", "bib", "bib", "md", "md", "md"]
+        len_data_temp = len(data_temp)  # len(data_temp) = len(mid_list) = len(post_list) = 8
+        split_flag = mid_list.index("-abbr")
+        for i in range(split_flag, len_data_temp):
+            path_temp = os.path.join(path_separate, f"{keywords_type}", f"{field}-{post_list[i]}{mid_list[i]}")
+            full_file = os.path.join(path_temp, rf"{file_prefix}.{post_list[i]}")
+            temp_data_list = read_list(data_temp[i][0], "r", None)
+            if not os.path.isfile(full_file):
+                if post_list[i] == "md":
+                    temp_data_list.insert(0, f"{self._level_title_md}" + " " + _title + "\n\n")
+                elif post_list[i] == "tex":
+                    temp_data_list.insert(0, f"\\{self._level_title_tex}" + "{" + _title + "}\n\n")
+            else:
+                temp_data_list.insert(0, "\n")
+            write_list(temp_data_list, full_file, "a", None, False, False)  # Compulsory `a`
+        return None
+class WriteAbbrCombinedResults(object):
+    """Write combined results for abbr (such as `TEVC`, `PNAS`).
+    Args:
+        options: dict
+    Attributes:
+        options (dict): options
+        pandoc_md_basic_to_pdf (bool): whether to convert basic md to pdf
+        pandoc_md_beauty_to_pdf (bool): whether to convert beauty md to pdf
+        pandoc_md_complex_to_pdf (bool): whether to convert complex md to pdf
+        pandoc_md_basic_to_html (bool): whether to convert basic md to html
+        pandoc_md_beauty_to_html (bool): whether to convert beauty md to html
+        pandoc_md_complex_to_html (bool): whether to convert complex md to html
+    """
+    def __init__(self, options: dict) -> None:
+        self.pandoc_md_basic_to_pdf: bool = options.get("pandoc_md_basic_to_pdf", False)
+        self.pandoc_md_beauty_to_pdf: bool = options.get("pandoc_md_beauty_to_pdf", False)
+        self.pandoc_md_complex_to_pdf: bool = options.get("pandoc_md_complex_to_pdf", False)
+        self.pandoc_md_basic_to_html: bool = options.get("pandoc_md_basic_to_html", False)
+        self.pandoc_md_beauty_to_html: bool = options.get("pandoc_md_beauty_to_html", False)
+        self.pandoc_md_complex_to_html: bool = options.get("pandoc_md_complex_to_html", True)
+        self._level_title_md = "##"
+        self._level_title_tex = "section"
+        self._pandoc_md_to = PandocMdTo(options)
+    def main(
+        self, search_field_list, keywords_type: str, field_data_dict: Dict[str, List[List[str]]], path_combine: str
+    ) -> Tuple[List[str], List[str]]:
+        path_subsection = os.path.join(path_combine, "tex-subsection")
+        path_md = os.path.join(path_combine, "md")
+        path_bib = os.path.join(path_combine, "bib")
+        mid_list = ["", "", "-abbr", "-zotero", "-save", "-basic", "-beauty", "-complex"]
+        post_list = ["tex", "md", "bib", "bib", "bib", "md", "md", "md"]
+        path_list = [path_subsection, path_md, path_bib, path_bib, path_bib]
+        for i in ["-basic", "-beauty", "-complex"]:
+            path_list.append(os.path.join(path_combine, f"md{i}"))
+        # len(mid_list) == len(post_list) == len(path_list) == 8
+        k_t_f_t = keywords_type_for_title(keywords_type)
+        error_pandoc_md_pdf, error_pandoc_md_html = [], []
+        for field in search_field_list:
+            if not field_data_dict.get(field):
+                continue
+            # write files
+            file_prefix = f"{field}-{keywords_type}"  # the file name prefix
+            _title = f"{field.title()} contains {k_t_f_t}"
+            for j in range(0, len(post_list)):
+                temp = combine_content_in_list([read_list(file, "r") for file in field_data_dict[field][j]], ["\n"])
+                if post_list[j] == "md":
+                    temp.insert(0, f"{self._level_title_md}" + " " + _title + "\n\n")
+                elif post_list[j] == "tex":
+                    temp.insert(0, f"\\{self._level_title_tex}" + "{" + _title + "}\n\n")
+                write_list(temp, f"{file_prefix}{mid_list[j]}.{post_list[j]}", "w", path_list[j])
+            # generate tex pdf html
+            # for tex
+            self._pandoc_md_to.generate_tex_content(file_prefix, path_subsection, path_bib, path_combine)
+            # for pdf
+            for i in ["basic", "beauty", "complex"]:
+                if eval(f"self.pandoc_md_{i}_to_pdf"):
+                    error_flag_pdf = self._pandoc_md_to.pandoc_md_to_pdf(
+                        os.path.join(path_combine, f"md-{i}"),
+                        f"{file_prefix}-{i}.md",
+                        os.path.join(path_combine, f"pdf-{i}"),
+                        f"{file_prefix}-{i}.pdf",
+                    )
+                    if error_flag_pdf:
+                        error_pandoc_md_pdf.append(error_flag_pdf)
+            # for html
+            for i in ["basic", "beauty", "complex"]:
+                if eval(f"self.pandoc_md_{i}_to_html"):
+                    error_flag_html = self._pandoc_md_to.pandoc_md_to_html(
+                        os.path.join(path_combine, f"md-{i}"),
+                        os.path.join(path_combine, f"html-{i}"),
+                        f"{file_prefix}-{i}.md",
+                        f"{file_prefix}-{i}.html",
+                        True
+                    )
+                    if error_flag_html:
+                        error_pandoc_md_html.append(error_flag_html)
+        return error_pandoc_md_pdf, error_pandoc_md_html

pyeasyphd/tools/search/utils.py ADDED Viewed

@@ -0,0 +1,152 @@
+import os
+import re
+from typing import Dict, List, Tuple, Union
+from pyadvtools import (
+    IterateSortDict,
+    is_list_contain_list_contain_str,
+    is_list_contain_str,
+    write_list,
+)
+def switch_keywords_list(xx: Union[List[str], List[List[str]]]) -> Tuple[List[List[str]], str]:
+    """Switch keyword.
+    Input: ["evolutionary", "algorithm"] or [["evolution"], ["evolutionary"]]
+    Output: [["evolutionary", "algorithm"]] or [["evolution"], ["evolutionary"]]
+    """
+    yyy: List[List[str]] = [[]]
+    if is_list_contain_str(xx):
+        yyy = [[rf"\b{x}\b" for x in xx]]
+    elif is_list_contain_list_contain_str(xx):
+        if len(xx) == 1:
+            yyy = [[rf"\b{x}\b" for x in xx[0]]]
+        elif len(xx) == 2:
+            yyy = [[rf"\b{x}\b" for x in xx[0]], [rf"\b{x}\b" for x in xx[1]]]
+        else:
+            print(f"Not standard keywords: {xx}")
+            return yyy, ""
+    else:
+        return yyy, ""
+    combine_keywords = "_and_".join(yyy[0])
+    if len(yyy) == 2:
+        combine_keywords += "_without_{}".format("_and_".join(yyy[1]))
+    # ['evol(?:ution|utionary) strateg(?:y|ies)', 'population(?:| |-)based', 'network(?:|s)']
+    # '\bevol(?:ution|utionary) strateg(?:y|ies)\b_and_\bpopulation(?:| |-)based\b_and_\bnetwork(?:|s)\b'
+    combine_keywords = combine_keywords.replace(r"\b", "")
+    # 'evol(?:ution|utionary) strateg(?:y|ies)_and_population(?:| |-)based_and_network(?:|s)'
+    combine_keywords = re.sub(r"\(\?:[\w\s\-|]*\) ", "0 ", combine_keywords)
+    # 'evol0 strateg(?:y|ies)_and_population(?:| |-)based_and_network(?:|s)'
+    combine_keywords = re.sub(r"\(\?:[\w\s\-|]*\)$", "1", combine_keywords)
+    # 'evol0 strateg(?:y|ies)_and_population(?:| |-)based_and_network1'
+    combine_keywords = re.sub(r"\(\?:[\w\s\-|]*\)_", "2_", combine_keywords)
+    # 'evol0 strateg2_and_population(?:| |-)based_and_network1'
+    combine_keywords = re.sub(r"\(\?:[\w\s\-|]*\)", "3", combine_keywords)
+    # 'evol0 strateg2_and_population3based_and_network1'
+    combine_keywords = combine_keywords.replace("/", "4")
+    combine_keywords = combine_keywords.replace(" ", "5")
+    # 'evol05strateg2_and_population3based_and_network1'
+    return yyy, combine_keywords
+def combine_keywords_for_title(combine_keywords: str) -> str:
+    combine_keywords = combine_keywords.replace("_without_", " without ")
+    combine_keywords = combine_keywords.replace("_and_", "; ")
+    combine_keywords = combine_keywords.replace("0", "")
+    combine_keywords = combine_keywords.replace("1", "")
+    combine_keywords = combine_keywords.replace("2", "")
+    combine_keywords = combine_keywords.replace("3", "-")
+    combine_keywords = combine_keywords.replace("4", "/")  #
+    combine_keywords = combine_keywords.replace("5", " ")
+    return combine_keywords
+def combine_keywords_for_file_name(combine_keywords: str) -> str:
+    combine_keywords = combine_keywords_for_title(combine_keywords)
+    combine_keywords = combine_keywords.replace("/", "-")
+    combine_keywords = combine_keywords.replace("; ", "_and_")
+    combine_keywords = combine_keywords.replace(" ", "_")
+    return combine_keywords
+def switch_keywords_type(keywords_type: str) -> str:
+    keywords_type = keywords_type.replace("/", "-")
+    keywords_type = keywords_type.replace(" ", "_")
+    keywords_type = re.sub(r"-+", "-", keywords_type)
+    keywords_type = re.sub(r"_+", "_", keywords_type)
+    return keywords_type.strip()
+def keywords_type_for_title(keywords_type: str) -> str:
+    keywords_type = keywords_type.replace("_", " ")
+    return keywords_type.strip()
+def extract_information(old_dict: Dict[str, Dict[str, Dict[str, Dict[str, Dict[str, int]]]]], path_output: str) -> None:
+    new_dict: Dict[str, Dict[str, Dict[str, Dict[str, Dict[str, int]]]]] = {}
+    for abbr in old_dict:
+        for entry_type in old_dict[abbr]:
+            for keyword_type in old_dict[abbr][entry_type]:
+                for keyword in old_dict[abbr][entry_type][keyword_type]:
+                    for field in old_dict[abbr][entry_type][keyword_type][keyword]:
+                        no = old_dict[abbr][entry_type][keyword_type][keyword][field]
+                        (
+                            new_dict.setdefault(entry_type, {})
+                            .setdefault(field, {})
+                            .setdefault(keyword_type, {})
+                            .setdefault(keyword, {})
+                            .update({abbr: no})
+                        )
+    new_dict = IterateSortDict(False).dict_update(new_dict)
+    for entry_type in new_dict:
+        for field in new_dict[entry_type]:
+            data_list = []
+            for keyword_type in new_dict[entry_type][field]:
+                for keyword in new_dict[entry_type][field][keyword_type]:
+                    abbr_list = sorted(list(new_dict[entry_type][field][keyword_type][keyword].keys()))
+                    num_list = [new_dict[entry_type][field][keyword_type][keyword][abbr] for abbr in abbr_list]
+                    a = f'|Keywords Types|Keywords|{"|".join([abbr for abbr in abbr_list])}|\n'
+                    if a not in data_list:
+                        data_list.append(a)
+                    b = f'|-|-|{"|".join(["-" for _ in abbr_list])}|\n'
+                    if b not in data_list:
+                        data_list.append(b)
+                    keyword = combine_keywords_for_file_name(keyword)
+                    data_list.append(f'|{keyword_type}|{keyword}|{"|".join([str(n) for n in num_list])}|\n')
+            write_list(data_list, f"{field}-keywords_count.md", "w", os.path.join(path_output, entry_type), False)
+temp_html_style = """  <style>
+    html {font-size: 19px;}
+    body {margin: 0 auto; max-width: 22em;}
+    table {
+      border-collapse: collapse;
+      border: 2px solid rgb(200,200,200);
+      letter-spacing: 1px;
+      font-size: 0.8rem;
+    }
+    td, th {
+      border: 1px solid rgb(190,190,190);
+      padding: 10px 20px;
+    }
+    td {text-align: center;}
+    caption {padding: 12px;}
+  </style>
+</head>
+<body>
+"""
+if __name__ == "__main__":
+    pass

pyeasyphd/tools/spider/process_spider_bib.py ADDED Viewed

@@ -0,0 +1,247 @@
+import copy
+import os
+import re
+import time
+from pyadvtools import (
+    IterateCombineExtendDict,
+    iterate_obtain_full_file_names,
+    read_list,
+    standard_path,
+    write_list,
+)
+from ...bib.bibtexparser.library import Library
+from ...main import PythonRunBib, PythonWriters
+from ..experiments_base import generate_readme
+from ..format_save_bibs import format_entries_for_abbr_zotero_save, generate_statistic_information
+EXCLUDE_ABBR_LIST = ["arxiv", "biorxiv", "ssrn"]
+class ProcessSpiderBib(object):
+    """Process spider bib.
+    Args:
+        path_abbr: The path of the abbreviation folder.
+        abbr_standard: The standard abbreviation.
+    Attributes:
+        path_abbr: The path of the abbreviation folder.
+        abbr_standard: The standard abbreviation.
+    """
+    def __init__(self, path_abbr: str, abbr_standard: str) -> None:
+        self.path_abbr = os.path.expandvars(os.path.expanduser(path_abbr))
+        self.abbr_standard = abbr_standard
+        self._options = {
+            "is_standardize_bib": True,  # default is True
+            "substitute_old_list": [
+                r"(<[a-zA-Z\-]+\s*/*\s*>)",
+                r'(</[a-zA-Z\-]+>)',
+                r'(<[a-zA-Z\-]+ [^\s]+="[^>]+?"\s*/*\s*>)',
+                r"([ ]+)",
+                r";[; ]*;",
+                r",[, ]*,"
+            ],
+            "substitute_new_list": ["", "", "", " ", ";", ","],
+            "is_sort_entry_fields": True,  # default is False
+            "is_sort_blocks": True,  # default is False
+            "sort_entries_by_field_keys_reverse": False,  # default is True
+            "empty_entry_cite_keys": True,
+        }
+        self._python_bib = PythonRunBib(self._options)
+    def format_spider_bib(self, write_bib: bool = False) -> None:
+        """Format spider bib."""
+        file_list = iterate_obtain_full_file_names(self.path_abbr, ".bib", False)
+        if write_bib:
+            if os.path.exists(readme := os.path.join(self.path_abbr, "README.md")):
+                os.remove(readme)
+        _options = {}
+        _options.update(self._options)
+        _python_writer = PythonWriters(_options)
+        for f in file_list:
+            print("*" * 5 + f" Format {os.path.basename(f)} " + "*" * 5)
+            data_list = read_list(f, "r")
+            # standardize
+            entry_type_year_volume_number_month_entry_dict = self._python_bib.parse_to_nested_entries_dict(data_list)
+            if not write_bib:
+                continue
+            # just for the necessary part
+            old_readme_md = [re.sub(r"[ ]+", "", line) for line in read_list("README.md", "r", self.path_abbr)]
+            new_readme_md = []
+            new_entry_list = []
+            for entry_type in entry_type_year_volume_number_month_entry_dict:
+                new_dict = entry_type_year_volume_number_month_entry_dict.get(entry_type.lower(), {})
+                # for README.md
+                readme_md = generate_readme(self.abbr_standard, entry_type.lower(), new_dict)
+                readme_md = readme_md[3:] if (old_readme_md or new_readme_md) else readme_md
+                readme_md = [line for line in readme_md if re.sub(r"[ ]+", "", line) not in old_readme_md]
+                new_readme_md.extend(readme_md)
+                # for bib
+                entry_list = IterateCombineExtendDict().dict_update(copy.deepcopy(new_dict))
+                new_entry_list.extend(entry_list)
+            write_list(new_readme_md, "README.md", "a", self.path_abbr, False)
+            _python_writer.write_to_file(new_entry_list, f, "w", None, False)
+        return None
+    def check_spider_bib(self, delete_duplicate_in_bibs: bool = False) -> None:
+        """Check bib."""
+        bibs_name = iterate_obtain_full_file_names(self.path_abbr, ".bib", False)
+        bibs_name = [[f, os.path.basename(f).split(".")[0].strip()] for f in bibs_name]
+        urls_name = iterate_obtain_full_file_names(self.path_abbr, ".csv", False)
+        urls_name = [[f, os.path.basename(f).split(".")[0].strip()] for f in urls_name]
+        url_base_names = [name[-1] for name in urls_name]
+        _options = {}
+        _options.update(self._options)
+        _python_writer = PythonWriters(_options)
+        for name in bibs_name:
+            bib_base_name = name[-1]
+            if bib_base_name not in url_base_names:
+                print(f"{bib_base_name}.csv not in the folder `url`.")
+                continue
+            full_bib, full_url = name[0], urls_name[url_base_names.index(bib_base_name)][0]
+            print("*" * 5 + f" Check {os.path.basename(full_bib)} and {os.path.basename(full_url)} " + "*" * 5)
+            bib_list = read_list(full_bib, "r")
+            # Check duplicated blocks in bib file
+            library = self._python_bib.parse_to_single_standard_library(bib_list)
+            url_bib_dict = {}
+            for entry in library.entries:
+                doi = entry["doi"] if "doi" in entry else ""
+                url_ = entry["url"] if "url" in entry else ""
+                url = doi if doi else url_
+                url_bib_dict.setdefault(url, []).append(entry)
+            duplicate_url, new_entries = [], []
+            for url in url_bib_dict:
+                if len(url_bib_dict[url]) > 1:
+                    duplicate_url.append(url)
+                if delete_duplicate_in_bibs:
+                    new_entries.append(url_bib_dict[url][0])
+            # Delete duplicated blocks in bib file
+            if duplicate_url:
+                print(f"Duplicates in {full_bib}: {duplicate_url}\n")
+            if duplicate_url and delete_duplicate_in_bibs:
+                _python_writer.write_to_file(new_entries, full_bib, "w", None, False)
+        return None
+    def move_spider_bib(self, path_shutil: str) -> None:
+        if self.abbr_standard.lower() in EXCLUDE_ABBR_LIST:
+            return None
+        # Move
+        print("*" * 5 + f" Start moving {self.abbr_standard} ... " + "*" * 5)
+        path_move = os.path.join(path_shutil, self.abbr_standard)
+        entry_type_entry_dict = {}
+        library = PythonRunBib({}).parse_to_single_standard_library(self.path_abbr)
+        for entry in library.entries:
+            entry_type_entry_dict.setdefault(entry.entry_type, []).append(entry)
+        for entry_type in entry_type_entry_dict:
+            format_entries_for_abbr_zotero_save(
+                self.abbr_standard,
+                path_move,
+                Library(entry_type_entry_dict[entry_type]),
+                combine_year_length=1,
+                default_year_list=self._default_year_list(entry_type),
+                write_flag_bib="a",
+                check_bib_exist=False,
+                write_flag_readme="a",
+                check_md_exist=False,
+                options=self._options,
+            )
+        generate_statistic_information(path_move)
+        print("*" * 5 + " Successfully moving ... " + "*" * 5)
+        # Delete
+        _options = {}
+        _options.update(self._options)
+        _python_writer = PythonWriters(_options)
+        print("*" * 5 + f" Start deleting {self.abbr_standard} ... " + "*" * 5)
+        bibs = iterate_obtain_full_file_names(self.path_abbr, ".bib")
+        for bib in bibs:
+            new_entries = []
+            library = self._python_bib.parse_to_single_standard_library(read_list(bib, "r"))
+            for entry in library.entries:
+                year = entry["year"] if "year" in entry else ""
+                if year not in self._default_year_list(entry.entry_type):
+                    new_entries.append(entry)
+            _python_writer.write_to_file(new_entries, bib, "w", None, False, True, True, False, True)
+        print("*" * 5 + " Successfully deleting ... " + "*" * 5)
+    @staticmethod
+    def _default_year_list(entry_type) -> list:
+        year = int(time.strftime("%Y", time.localtime()))
+        month = int(time.strftime("%m", time.localtime()))
+        m = 0 if month <= 3 else 1
+        if entry_type == "article":
+            default_year_list = [str(i) for i in range(1800, year + m - 1)]
+        elif entry_type == "inproceedings":
+            default_year_list = [str(i) for i in range(1800, year + 2)]
+        else:
+            default_year_list = [str(i) for i in range(1800, year + m - 1)]
+        return default_year_list
+    def simplify_early_access(self):
+        # for IEEE Early Access
+        path_ieee_early_access = self.path_abbr
+        path_ieee = path_ieee_early_access.replace("spider_j_e", "spider_j")
+        _options = {}
+        _options.update(self._options)
+        _python_writer = PythonWriters(_options)
+        print(f"***** Simplify {self.abbr_standard} *****")
+        path_url_ieee_early_access = os.path.join(path_ieee_early_access, "url")
+        path_bib_ieee_early_access = os.path.join(path_ieee_early_access, "bib")
+        path_url_ieee = os.path.join(path_ieee, "url")
+        # for txt urls
+        data_list = read_list(f"{self.abbr_standard}_0.txt", "r", path_url_ieee_early_access)
+        for name in [f for f in os.listdir(path_url_ieee) if f.endswith(".txt")]:
+            temp_data_list = read_list(name, "r", path_url_ieee)
+            data_list = list(set(data_list).difference(set(temp_data_list)))
+        write_list(sorted(data_list), f"{self.abbr_standard}_0.txt", "w", path_url_ieee_early_access, False)
+        # for csv urls
+        data_list_csv = read_list(f"{self.abbr_standard}_0.csv", "r", path_url_ieee_early_access)
+        data_list_txt = read_list(f"{self.abbr_standard}_0.txt", "r", path_url_ieee_early_access)
+        data_list = list(set(data_list_csv).intersection(set(data_list_txt)))
+        write_list(sorted(data_list), f"{self.abbr_standard}_0.csv", "w", path_url_ieee_early_access, False)
+        # for bibs
+        data_list_bib = read_list(f"{self.abbr_standard}_0.bib", "r", path_bib_ieee_early_access)
+        data_list_url = read_list(f"{self.abbr_standard}_0.txt", "r", path_url_ieee_early_access)
+        entries = []
+        library = self._python_bib.parse_to_single_standard_library(data_list_bib)
+        for url in data_list_url:
+            for entry in library.entries:
+                if standard_path(url) == standard_path(entry["url"]):
+                    entries.append(entry)
+                    break
+        _python_writer.write_to_file(
+            entries, f"{self.abbr_standard}_0.bib", "w", path_bib_ieee_early_access, False, True, True, True
+        )