pyeasyphd 0.4.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyeasyphd/.python-version +1 -0
- pyeasyphd/Main.sublime-menu +43 -0
- pyeasyphd/__init__.py +5 -0
- pyeasyphd/data/templates/csl/apa-no-ampersand.csl +2183 -0
- pyeasyphd/data/templates/csl/apa.csl +2133 -0
- pyeasyphd/data/templates/csl/ieee.csl +512 -0
- pyeasyphd/data/templates/tex/Article.tex +38 -0
- pyeasyphd/data/templates/tex/Article_Header.tex +29 -0
- pyeasyphd/data/templates/tex/Article_Tail.tex +3 -0
- pyeasyphd/data/templates/tex/Beamer_Header.tex +79 -0
- pyeasyphd/data/templates/tex/Beamer_Tail.tex +14 -0
- pyeasyphd/data/templates/tex/Style.tex +240 -0
- pyeasyphd/data/templates/tex/TEVC_Header.tex +52 -0
- pyeasyphd/data/templates/tex/TEVC_Tail.tex +4 -0
- pyeasyphd/data/templates/tex/eisvogel.tex +1064 -0
- pyeasyphd/data/templates/tex/math.tex +201 -0
- pyeasyphd/data/templates/tex/math_commands.tex +677 -0
- pyeasyphd/data/templates/tex/nextaimathmacros.sty +681 -0
- pyeasyphd/main/__init__.py +6 -0
- pyeasyphd/main/basic_input.py +101 -0
- pyeasyphd/main/pandoc_md_to.py +380 -0
- pyeasyphd/main/python_run_md.py +320 -0
- pyeasyphd/main/python_run_tex.py +200 -0
- pyeasyphd/pyeasyphd.py +86 -0
- pyeasyphd/pyeasyphd.sublime-settings +100 -0
- pyeasyphd/pyeasyphd.sublime-syntax +5 -0
- pyeasyphd/scripts/__init__.py +34 -0
- pyeasyphd/scripts/_base.py +65 -0
- pyeasyphd/scripts/run_article_md.py +101 -0
- pyeasyphd/scripts/run_article_tex.py +94 -0
- pyeasyphd/scripts/run_beamer_tex.py +84 -0
- pyeasyphd/scripts/run_compare.py +71 -0
- pyeasyphd/scripts/run_format.py +62 -0
- pyeasyphd/scripts/run_generate.py +211 -0
- pyeasyphd/scripts/run_replace.py +34 -0
- pyeasyphd/scripts/run_search.py +251 -0
- pyeasyphd/tools/__init__.py +12 -0
- pyeasyphd/tools/generate/generate_from_bibs.py +181 -0
- pyeasyphd/tools/generate/generate_html.py +166 -0
- pyeasyphd/tools/generate/generate_library.py +203 -0
- pyeasyphd/tools/generate/generate_links.py +400 -0
- pyeasyphd/tools/py_run_bib_md_tex.py +398 -0
- pyeasyphd/tools/search/data.py +282 -0
- pyeasyphd/tools/search/search_base.py +146 -0
- pyeasyphd/tools/search/search_core.py +400 -0
- pyeasyphd/tools/search/search_keywords.py +229 -0
- pyeasyphd/tools/search/search_writers.py +350 -0
- pyeasyphd/tools/search/utils.py +190 -0
- pyeasyphd/utils/utils.py +99 -0
- pyeasyphd-0.4.42.dist-info/METADATA +33 -0
- pyeasyphd-0.4.42.dist-info/RECORD +53 -0
- pyeasyphd-0.4.42.dist-info/WHEEL +4 -0
- pyeasyphd-0.4.42.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from pybibtexer.bib.bibtexparser import Library
|
|
5
|
+
from pybibtexer.main import PythonRunBib, PythonWriters
|
|
6
|
+
|
|
7
|
+
from ...main import BasicInput
|
|
8
|
+
from .search_writers import WriteInitialResult, WriteSeparateResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def search_keywords_core(keywords_list_list: list[list[str]], library: Library, field: str) -> tuple[Library, Library]:
|
|
12
|
+
"""Search keywords in specified field such as title, abstract, or keywords.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
keywords_list_list (list[list[str]]): list of keyword lists to search for.
|
|
16
|
+
library (Library): Bibliography library to search.
|
|
17
|
+
field (str): Field to search in (e.g., 'title', 'abstract', 'keywords').
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
tuple[Library, Library]: Tuple containing (matching_library, non_matching_library).
|
|
21
|
+
"""
|
|
22
|
+
search_library = []
|
|
23
|
+
no_search_library = []
|
|
24
|
+
|
|
25
|
+
for entry in library.entries:
|
|
26
|
+
flag = False
|
|
27
|
+
content = entry[field] if field in entry else ""
|
|
28
|
+
if content:
|
|
29
|
+
content = re.sub("{", "", content)
|
|
30
|
+
content = re.sub("}", "", content)
|
|
31
|
+
|
|
32
|
+
# All keywords from keyword_list_list[0] should be found in bib
|
|
33
|
+
flag = all(re.search(keyword, content, flags=re.I) for keyword in keywords_list_list[0])
|
|
34
|
+
if flag and (len(keywords_list_list) == 2):
|
|
35
|
+
# Any keywords from keyword_list_list[1] found in bib will results in False flag.
|
|
36
|
+
flag = not any(re.search(keyword, content, flags=re.I) for keyword in keywords_list_list[1])
|
|
37
|
+
|
|
38
|
+
if flag:
|
|
39
|
+
search_library.append(entry)
|
|
40
|
+
else:
|
|
41
|
+
no_search_library.append(entry)
|
|
42
|
+
|
|
43
|
+
return Library(search_library), Library(no_search_library)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class SearchInitialResult(BasicInput):
|
|
47
|
+
"""Class for searching and processing initial results.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
options (dict): Configuration options.
|
|
51
|
+
|
|
52
|
+
Attributes:
|
|
53
|
+
options (dict): Configuration options.
|
|
54
|
+
print_on_screen (bool): Whether to print results on screen. Defaults to False.
|
|
55
|
+
deepcopy_library_for_every_field (bool): Whether to deep copy library for every field. Defaults to False.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self, options: dict) -> None:
|
|
59
|
+
"""Initialize SearchInitialResult with configuration options.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
options (dict): Configuration options.
|
|
63
|
+
"""
|
|
64
|
+
super().__init__(options)
|
|
65
|
+
|
|
66
|
+
self.print_on_screen: bool = options.get("print_on_screen", False)
|
|
67
|
+
self.deepcopy_library_for_every_field = options.get("deepcopy_library_for_every_field", False)
|
|
68
|
+
|
|
69
|
+
self._python_bib = PythonRunBib(options)
|
|
70
|
+
|
|
71
|
+
_options = {}
|
|
72
|
+
_options["empty_entry_cite_keys"] = True
|
|
73
|
+
_options.update(self.options)
|
|
74
|
+
self._python_writer = PythonWriters(_options)
|
|
75
|
+
|
|
76
|
+
def main(
|
|
77
|
+
self,
|
|
78
|
+
search_field_list: list[str],
|
|
79
|
+
path_initial: str,
|
|
80
|
+
library: Library,
|
|
81
|
+
keywords_type: str,
|
|
82
|
+
keywords_list_list: list[list[str]],
|
|
83
|
+
combine_keywords: str,
|
|
84
|
+
output_prefix: str,
|
|
85
|
+
path_separate: str,
|
|
86
|
+
) -> tuple[list[str], dict[str, list[list[str]]], dict[str, int], Library]:
|
|
87
|
+
"""Main search method for processing search results.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
search_field_list (list[str]): list of fields to search.
|
|
91
|
+
path_initial (str): Path to initial directory.
|
|
92
|
+
library (Library): Bibliography library to search.
|
|
93
|
+
keywords_type (str): Type of keywords being searched.
|
|
94
|
+
keywords_list_list (list[list[str]]): list of keyword lists.
|
|
95
|
+
combine_keywords (str): Combined keywords string.
|
|
96
|
+
output_prefix (str): Prefix for output files.
|
|
97
|
+
path_separate (str): Path to separate directory.
|
|
98
|
+
|
|
99
|
+
Returns:
|
|
100
|
+
tuple[list[str], dict[str, list[list[str]]], dict[str, int], Library]: Tuple containing error messages, field data, field numbers, and remaining library.
|
|
101
|
+
"""
|
|
102
|
+
error_pandoc_md_md, field_data_dict, no_search_library = [], {}, library
|
|
103
|
+
field_number_dict: dict[str, int] = {}
|
|
104
|
+
|
|
105
|
+
for field in search_field_list:
|
|
106
|
+
if len(no_search_library.entries) == 0:
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
# Search
|
|
110
|
+
search_library, no_search_library = search_keywords_core(keywords_list_list, no_search_library, field)
|
|
111
|
+
field_number_dict.update({field: len(search_library.entries)})
|
|
112
|
+
|
|
113
|
+
# Deepcopy library for every field
|
|
114
|
+
if self.deepcopy_library_for_every_field:
|
|
115
|
+
no_search_library = copy.deepcopy(library)
|
|
116
|
+
|
|
117
|
+
# Operate on the search library (deepcopy)
|
|
118
|
+
libraries = self._python_bib.parse_to_multi_standard_library(copy.deepcopy(search_library))
|
|
119
|
+
library_for_abbr, library_for_zotero, library_for_save = libraries
|
|
120
|
+
|
|
121
|
+
if self.print_on_screen:
|
|
122
|
+
print("".join(self._python_writer.write_to_str(library_for_zotero)))
|
|
123
|
+
continue
|
|
124
|
+
if not (library_for_abbr.entries and library_for_zotero.entries and library_for_save.entries):
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
# Initially write tex, bib, and md files
|
|
128
|
+
data_temp, temp_error_pandoc_md_md = WriteInitialResult(copy.deepcopy(self.options)).main(
|
|
129
|
+
path_initial,
|
|
130
|
+
output_prefix,
|
|
131
|
+
field,
|
|
132
|
+
keywords_type,
|
|
133
|
+
combine_keywords,
|
|
134
|
+
library_for_abbr,
|
|
135
|
+
library_for_zotero,
|
|
136
|
+
library_for_save,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Separatelly write with the method 'a' for '_basic', '_beauty', '_complex'
|
|
140
|
+
WriteSeparateResult().main(copy.deepcopy(data_temp), field, keywords_type, combine_keywords, path_separate)
|
|
141
|
+
|
|
142
|
+
# Save for combined results
|
|
143
|
+
field_data_dict.update({field: copy.deepcopy(data_temp)})
|
|
144
|
+
error_pandoc_md_md.extend(temp_error_pandoc_md_md)
|
|
145
|
+
|
|
146
|
+
return error_pandoc_md_md, field_data_dict, field_number_dict, no_search_library
|
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import shutil
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pyadvtools import (
|
|
8
|
+
IterateCombineExtendDict,
|
|
9
|
+
IterateUpdateDict,
|
|
10
|
+
combine_content_in_list,
|
|
11
|
+
pairwise_combine_in_list,
|
|
12
|
+
read_list,
|
|
13
|
+
sort_int_str,
|
|
14
|
+
standard_path,
|
|
15
|
+
write_list,
|
|
16
|
+
)
|
|
17
|
+
from pybibtexer.bib.bibtexparser import Library
|
|
18
|
+
from pybibtexer.main import PythonRunBib
|
|
19
|
+
|
|
20
|
+
from ...main import BasicInput
|
|
21
|
+
from .search_base import SearchInitialResult
|
|
22
|
+
from .search_writers import WriteAbbrCombinedResults
|
|
23
|
+
from .utils import keywords_type_for_title, switch_keywords_list, switch_keywords_type
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SearchResultsCore(BasicInput):
|
|
27
|
+
"""Core class for generating tex, md, html, and pdf from search results.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
path_storage (str): Path to storage directory for bibliography files.
|
|
31
|
+
path_output (str): Path to output directory for generated files.
|
|
32
|
+
path_separate (str): Path to separate directory for individual results.
|
|
33
|
+
j_conf_abbr (str): Abbreviation of journal or conference.
|
|
34
|
+
options (dict): Configuration options.
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
path_storage (str): Path to storage directory.
|
|
38
|
+
path_output (str): Path to output directory.
|
|
39
|
+
path_separate (str): Path to separate directory.
|
|
40
|
+
j_conf_abbr (str): Abbreviation of journal or conference.
|
|
41
|
+
is_standard_bib_file_name (bool): Whether the bib file name follows standard format.
|
|
42
|
+
keywords_type_list (list[str]): list of keyword types to search.
|
|
43
|
+
keywords_dict (dict): dictionary of keywords for searching.
|
|
44
|
+
delete_redundant_files (bool): Whether to delete redundant files after processing.
|
|
45
|
+
generate_basic_md (bool): Whether to generate basic markdown files.
|
|
46
|
+
generate_beauty_md (bool): Whether to generate beautiful markdown files.
|
|
47
|
+
generate_complex_md (bool): Whether to generate complex markdown files.
|
|
48
|
+
generate_tex (bool): Whether to generate LaTeX files.
|
|
49
|
+
first_field_second_keywords (bool): Whether to search fields first, then keywords.
|
|
50
|
+
deepcopy_library_for_every_field (bool): Whether to deep copy library for every field.
|
|
51
|
+
deepcopy_library_for_every_keywords (bool): Whether to deep copy library for every keywords.
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self, path_storage: str, path_output: str, path_separate: str, j_conf_abbr: str, options: dict[str, Any]
|
|
56
|
+
) -> None:
|
|
57
|
+
"""Initialize SearchResultsCore with paths and configuration.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
path_storage (str): Path to storage directory for bibliography files.
|
|
61
|
+
path_output (str): Path to output directory for generated files.
|
|
62
|
+
path_separate (str): Path to separate directory for individual results.
|
|
63
|
+
j_conf_abbr (str): Abbreviation of journal or conference.
|
|
64
|
+
options (dict[str, Any]): Configuration options.
|
|
65
|
+
"""
|
|
66
|
+
super().__init__(options)
|
|
67
|
+
self.path_storage: str = standard_path(path_storage)
|
|
68
|
+
self.path_output: str = standard_path(path_output)
|
|
69
|
+
self.path_separate: str = standard_path(path_separate)
|
|
70
|
+
self.j_conf_abbr: str = j_conf_abbr
|
|
71
|
+
|
|
72
|
+
# for bib
|
|
73
|
+
# Whether the bib file name is standard, such as `TEVC_2023.bib`.
|
|
74
|
+
self.is_standard_bib_file_name: bool = options.get("is_standard_bib_file_name", True) # TEVC_2023.bib
|
|
75
|
+
|
|
76
|
+
# for search
|
|
77
|
+
self.keywords_dict = options.get("default_keywords_dict", {})
|
|
78
|
+
if temp := options.get("keywords_dict", []):
|
|
79
|
+
self.keywords_dict = temp
|
|
80
|
+
|
|
81
|
+
if keywords_type_list := options.get("keywords_type_list", []):
|
|
82
|
+
self.keywords_dict = {k: v for k, v in self.keywords_dict.items() if k in keywords_type_list}
|
|
83
|
+
|
|
84
|
+
self.keywords_dict = {switch_keywords_type(k): v for k, v in self.keywords_dict.items()}
|
|
85
|
+
|
|
86
|
+
self.search_field_list = options.get("default_search_field_list", ["title", "abstract"])
|
|
87
|
+
if temp := options.get("search_field_list", []):
|
|
88
|
+
self.search_field_list = temp
|
|
89
|
+
|
|
90
|
+
# for pandoc
|
|
91
|
+
self.delete_redundant_files: bool = options.get("delete_redundant_files", True)
|
|
92
|
+
|
|
93
|
+
# for md
|
|
94
|
+
self.generate_basic_md: bool = options.get("generate_basic_md", False)
|
|
95
|
+
self.generate_beauty_md: bool = options.get("generate_beauty_md", False)
|
|
96
|
+
self.generate_complex_md: bool = options.get("generate_complex_md", True)
|
|
97
|
+
|
|
98
|
+
# for tex
|
|
99
|
+
self.generate_tex = options.get("generate_tex", False)
|
|
100
|
+
|
|
101
|
+
# for search
|
|
102
|
+
self.first_field_second_keywords = options.get("first_field_second_keywords", True)
|
|
103
|
+
self.deepcopy_library_for_every_field = options.get("deepcopy_library_for_every_field", False)
|
|
104
|
+
self.deepcopy_library_for_every_keywords = options.get("deepcopy_library_for_every_keywords", False)
|
|
105
|
+
|
|
106
|
+
# for bib
|
|
107
|
+
self._python_bib = PythonRunBib(options)
|
|
108
|
+
|
|
109
|
+
def optimize(self, search_year_list: list[str] = []) -> dict[str, dict[str, dict[str, dict[str, int]]]]:
|
|
110
|
+
"""Optimize search results for given years.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
search_year_list (list[str], optional): list of years to search. Defaults to [].
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
dict[str, dict[str, dict[str, dict[str, int]]]]: Nested dictionary containing search results.
|
|
117
|
+
"""
|
|
118
|
+
search_year_list = list({str(i) for i in search_year_list})
|
|
119
|
+
|
|
120
|
+
data_list = self._obtain_full_files_data(self.path_storage, "bib", search_year_list)
|
|
121
|
+
|
|
122
|
+
entry_type_keyword_type_keyword_field_number_dict = self.optimize_core(data_list, search_year_list)
|
|
123
|
+
return entry_type_keyword_type_keyword_field_number_dict
|
|
124
|
+
|
|
125
|
+
def _obtain_full_files_data(
|
|
126
|
+
self, path_storage: str, extension: str, search_year_list: list[str] | None = None
|
|
127
|
+
) -> list[str]:
|
|
128
|
+
"""Obtain data from all files with specified extension in storage path.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
path_storage (str): Path to storage directory.
|
|
132
|
+
extension (str): File extension to search for.
|
|
133
|
+
search_year_list (list[str], optional): list of years to filter by. Defaults to [].
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
list[str]: Combined content from all matching files.
|
|
137
|
+
"""
|
|
138
|
+
if search_year_list is None:
|
|
139
|
+
search_year_list = []
|
|
140
|
+
|
|
141
|
+
regex = None
|
|
142
|
+
if self.is_standard_bib_file_name and search_year_list:
|
|
143
|
+
regex = re.compile(f"({'|'.join(search_year_list)})")
|
|
144
|
+
|
|
145
|
+
file_list = []
|
|
146
|
+
for root, _, files in os.walk(path_storage, topdown=True):
|
|
147
|
+
files = [f for f in files if f.endswith(f".{extension}")]
|
|
148
|
+
|
|
149
|
+
if regex:
|
|
150
|
+
files = [f for f in files if regex.search(f)]
|
|
151
|
+
|
|
152
|
+
file_list.extend([os.path.join(root, f) for f in files])
|
|
153
|
+
|
|
154
|
+
return combine_content_in_list([read_list(f, "r") for f in sort_int_str(file_list)], None)
|
|
155
|
+
|
|
156
|
+
def optimize_core(self, data_list: list[str], search_year_list) -> dict[str, dict[str, dict[str, dict[str, int]]]]:
|
|
157
|
+
"""Core optimization logic for processing bibliography data.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
data_list (list[str]): list of bibliography data strings.
|
|
161
|
+
search_year_list: list of years to search.
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
dict[str, dict[str, dict[str, dict[str, int]]]]: Nested dictionary containing search results.
|
|
165
|
+
"""
|
|
166
|
+
print("\n" + "*" * 9 + f" Search in {self.j_conf_abbr} " + "*" * 9)
|
|
167
|
+
|
|
168
|
+
entry_type_year_volume_number_month_entry_dict = self._python_bib.parse_to_nested_entries_dict(data_list)
|
|
169
|
+
|
|
170
|
+
# generate standard bib and output
|
|
171
|
+
entry_type_keyword_type_keyword_field_number_dict: dict[str, dict[str, dict[str, dict[str, int]]]] = {}
|
|
172
|
+
for entry_type in entry_type_year_volume_number_month_entry_dict:
|
|
173
|
+
# obtain search years
|
|
174
|
+
year_list = list(entry_type_year_volume_number_month_entry_dict[entry_type].keys())
|
|
175
|
+
if search_year_list:
|
|
176
|
+
year_list = [y for y in year_list if y in search_year_list]
|
|
177
|
+
year_list = sort_int_str(year_list, reverse=True)
|
|
178
|
+
if not year_list:
|
|
179
|
+
print("year_list is empty.")
|
|
180
|
+
continue
|
|
181
|
+
|
|
182
|
+
# output prefix
|
|
183
|
+
output_prefix = "-".join([self.j_conf_abbr, year_list[-1], year_list[0]])
|
|
184
|
+
|
|
185
|
+
# generate paths
|
|
186
|
+
p_origin = os.path.join(self.path_output, entry_type, f"{output_prefix}-Origin")
|
|
187
|
+
p_separate = os.path.join(self.path_separate, entry_type)
|
|
188
|
+
p_combine = os.path.join(self.path_output, entry_type, f"{output_prefix}-Combine")
|
|
189
|
+
|
|
190
|
+
# obtain library
|
|
191
|
+
new_dict = {year: entry_type_year_volume_number_month_entry_dict[entry_type][year] for year in year_list}
|
|
192
|
+
entries = IterateCombineExtendDict().dict_update(new_dict)
|
|
193
|
+
library = Library(entries)
|
|
194
|
+
|
|
195
|
+
# search, generate and save
|
|
196
|
+
keyword_type_keyword_field_number_dict = {}
|
|
197
|
+
for keywords_type in self.keywords_dict:
|
|
198
|
+
library = copy.deepcopy(library)
|
|
199
|
+
|
|
200
|
+
if self.first_field_second_keywords:
|
|
201
|
+
keyword_field_number_dict = self._optimize_fields_keyword(
|
|
202
|
+
keywords_type, library, output_prefix, p_origin, p_separate, p_combine
|
|
203
|
+
)
|
|
204
|
+
else:
|
|
205
|
+
keyword_field_number_dict = self._optimize_keywords_field(
|
|
206
|
+
keywords_type, library, output_prefix, p_origin, p_separate, p_combine
|
|
207
|
+
)
|
|
208
|
+
keyword_type_keyword_field_number_dict.update({keywords_type: keyword_field_number_dict})
|
|
209
|
+
|
|
210
|
+
# collect results
|
|
211
|
+
entry_type_keyword_type_keyword_field_number_dict.setdefault(entry_type, {}).update(
|
|
212
|
+
keyword_type_keyword_field_number_dict
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
return entry_type_keyword_type_keyword_field_number_dict
|
|
216
|
+
|
|
217
|
+
def _optimize_fields_keyword(self, keywords_type, library, output_prefix, p_origin, p_separate, p_combine):
|
|
218
|
+
"""Optimize search by fields first, then keywords.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
keywords_type: Type of keywords to search.
|
|
222
|
+
library: Bibliography library to search.
|
|
223
|
+
output_prefix (str): Prefix for output files.
|
|
224
|
+
p_origin (str): Path to origin directory.
|
|
225
|
+
p_separate (str): Path to separate directory.
|
|
226
|
+
p_combine (str): Path to combine directory.
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
dict: dictionary containing keyword field numbers.
|
|
230
|
+
"""
|
|
231
|
+
no_search_library = library
|
|
232
|
+
|
|
233
|
+
keyword_field_number_dict_ = {}
|
|
234
|
+
for field in self.search_field_list:
|
|
235
|
+
keyword_field_number_dict, no_search_library = self.core_optimize(
|
|
236
|
+
[field], keywords_type, no_search_library, output_prefix, p_origin, p_separate, p_combine
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
if self.deepcopy_library_for_every_field:
|
|
240
|
+
no_search_library = copy.deepcopy(library)
|
|
241
|
+
|
|
242
|
+
temp = keyword_field_number_dict
|
|
243
|
+
keyword_field_number_dict_ = IterateUpdateDict().dict_update(keyword_field_number_dict_, temp)
|
|
244
|
+
return keyword_field_number_dict_
|
|
245
|
+
|
|
246
|
+
def _optimize_keywords_field(self, keywords_type, library, output_prefix, p_origin, p_separate, p_combine):
|
|
247
|
+
"""Optimize search by keywords first, then fields.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
keywords_type: Type of keywords to search.
|
|
251
|
+
library: Bibliography library to search.
|
|
252
|
+
output_prefix (str): Prefix for output files.
|
|
253
|
+
p_origin (str): Path to origin directory.
|
|
254
|
+
p_separate (str): Path to separate directory.
|
|
255
|
+
p_combine (str): Path to combine directory.
|
|
256
|
+
|
|
257
|
+
Returns:
|
|
258
|
+
dict: dictionary containing keyword field numbers.
|
|
259
|
+
"""
|
|
260
|
+
no_search_library = library
|
|
261
|
+
|
|
262
|
+
keyword_field_number_dict, no_search_library = self.core_optimize(
|
|
263
|
+
self.search_field_list, keywords_type, no_search_library, output_prefix, p_origin, p_separate, p_combine
|
|
264
|
+
)
|
|
265
|
+
return keyword_field_number_dict
|
|
266
|
+
|
|
267
|
+
def core_optimize(
|
|
268
|
+
self,
|
|
269
|
+
search_field_list: list[str],
|
|
270
|
+
keywords_type,
|
|
271
|
+
library: Library,
|
|
272
|
+
output_prefix: str,
|
|
273
|
+
p_origin: str,
|
|
274
|
+
p_separate: str,
|
|
275
|
+
p_combine: str,
|
|
276
|
+
) -> tuple[dict[str, dict[str, int]], Library]:
|
|
277
|
+
"""Core optimization method for processing search results.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
search_field_list (list[str]): list of fields to search.
|
|
281
|
+
keywords_type: Type of keywords to search.
|
|
282
|
+
library (Library): Bibliography library to search.
|
|
283
|
+
output_prefix (str): Prefix for output files.
|
|
284
|
+
p_origin (str): Path to origin directory.
|
|
285
|
+
p_separate (str): Path to separate directory.
|
|
286
|
+
p_combine (str): Path to combine directory.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
tuple[dict[str, dict[str, int]], Library]: Tuple containing keyword field numbers and remaining library.
|
|
290
|
+
"""
|
|
291
|
+
error_pandoc_md_md: list[str] = []
|
|
292
|
+
save_field_data_dict: dict[str, list[list[str]]] = {}
|
|
293
|
+
keyword_field_number_dict: dict[str, dict[str, int]] = {}
|
|
294
|
+
|
|
295
|
+
no_search_library = library
|
|
296
|
+
for keywords_list in self.keywords_dict[keywords_type]:
|
|
297
|
+
print(f"{output_prefix}-{keywords_type}-search-{keywords_list}")
|
|
298
|
+
keywords_list_list, combine_keyword = switch_keywords_list(keywords_list)
|
|
299
|
+
|
|
300
|
+
# for initial results
|
|
301
|
+
error_md, field_data_dict, field_number_dict, no_search_library = SearchInitialResult(
|
|
302
|
+
copy.deepcopy(self.options)
|
|
303
|
+
).main(
|
|
304
|
+
search_field_list,
|
|
305
|
+
p_origin,
|
|
306
|
+
no_search_library,
|
|
307
|
+
keywords_type,
|
|
308
|
+
keywords_list_list,
|
|
309
|
+
combine_keyword,
|
|
310
|
+
output_prefix,
|
|
311
|
+
p_separate,
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
if self.deepcopy_library_for_every_keywords:
|
|
315
|
+
no_search_library = copy.deepcopy(library)
|
|
316
|
+
|
|
317
|
+
# collect error parts
|
|
318
|
+
error_pandoc_md_md.extend(error_md)
|
|
319
|
+
|
|
320
|
+
# collect data
|
|
321
|
+
for field in field_data_dict:
|
|
322
|
+
temp = pairwise_combine_in_list(save_field_data_dict.get(field, []), field_data_dict[field], "\n")
|
|
323
|
+
save_field_data_dict.update({field: temp})
|
|
324
|
+
|
|
325
|
+
# collect number
|
|
326
|
+
keyword_field_number_dict.update({combine_keyword: field_number_dict})
|
|
327
|
+
|
|
328
|
+
kws_type = keywords_type_for_title(keywords_type)
|
|
329
|
+
flag = "-".join(search_field_list)
|
|
330
|
+
|
|
331
|
+
# for error parts in pandoc markdown to markdown
|
|
332
|
+
if error_pandoc_md_md:
|
|
333
|
+
error_pandoc_md_md.insert(0, f"# Error in pandoc md to md for {kws_type}\n\n")
|
|
334
|
+
write_list(error_pandoc_md_md, rf"{flag}_{output_prefix}_error_pandoc_md_md.md", "a", p_combine)
|
|
335
|
+
|
|
336
|
+
# combine part
|
|
337
|
+
# for combined results
|
|
338
|
+
error_pandoc_md_pdf, error_pandoc_md_html = WriteAbbrCombinedResults(copy.deepcopy(self.options)).main(
|
|
339
|
+
search_field_list, keywords_type, save_field_data_dict, p_combine
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# for error parts in pandoc markdown to pdf
|
|
343
|
+
if error_pandoc_md_pdf:
|
|
344
|
+
error_pandoc_md_pdf.insert(0, f"# Error in pandoc md to pdf for {kws_type}\n\n")
|
|
345
|
+
write_list(error_pandoc_md_pdf, rf"{flag}_{output_prefix}_error_pandoc_md_pdf.md", "a", p_combine)
|
|
346
|
+
|
|
347
|
+
# for error parts in pandoc markdown to html
|
|
348
|
+
if error_pandoc_md_html:
|
|
349
|
+
error_pandoc_md_html.insert(0, f"# Error in pandoc md to html for {kws_type}\n\n")
|
|
350
|
+
write_list(error_pandoc_md_html, rf"{flag}_{output_prefix}_error_pandoc_md_html.md", "a", p_combine)
|
|
351
|
+
|
|
352
|
+
# delete redundant files
|
|
353
|
+
if self.delete_redundant_files:
|
|
354
|
+
self.delete_files(keywords_type, p_origin, p_separate, p_combine)
|
|
355
|
+
|
|
356
|
+
return keyword_field_number_dict, no_search_library
|
|
357
|
+
|
|
358
|
+
def delete_files(self, keywords_type: str, p_origin: str, p_separate: str, p_combine: str) -> None:
|
|
359
|
+
"""Delete redundant files after processing.
|
|
360
|
+
|
|
361
|
+
Args:
|
|
362
|
+
keywords_type (str): Type of keywords being processed.
|
|
363
|
+
p_origin (str): Path to origin directory.
|
|
364
|
+
p_separate (str): Path to separate directory.
|
|
365
|
+
p_combine (str): Path to combine directory.
|
|
366
|
+
"""
|
|
367
|
+
# for initial tex md bib
|
|
368
|
+
if os.path.exists(p_origin):
|
|
369
|
+
shutil.rmtree(p_origin)
|
|
370
|
+
|
|
371
|
+
# for separate keywords
|
|
372
|
+
delete_folder_list = []
|
|
373
|
+
if not self.generate_basic_md:
|
|
374
|
+
delete_folder_list.append("basic")
|
|
375
|
+
if not self.generate_beauty_md:
|
|
376
|
+
delete_folder_list.append("beauty")
|
|
377
|
+
if not self.generate_complex_md:
|
|
378
|
+
delete_folder_list.append("complex")
|
|
379
|
+
|
|
380
|
+
for d in delete_folder_list:
|
|
381
|
+
for field in self.search_field_list:
|
|
382
|
+
path_delete = os.path.join(p_separate, keywords_type, rf"{field}-md-{d}")
|
|
383
|
+
if os.path.exists(path_delete):
|
|
384
|
+
shutil.rmtree(path_delete)
|
|
385
|
+
|
|
386
|
+
# for combine
|
|
387
|
+
delete_folder_list = ["md"]
|
|
388
|
+
if not self.generate_basic_md:
|
|
389
|
+
delete_folder_list.append("md-basic")
|
|
390
|
+
if not self.generate_beauty_md:
|
|
391
|
+
delete_folder_list.append("md-beauty")
|
|
392
|
+
if not self.generate_complex_md:
|
|
393
|
+
delete_folder_list.append("md-complex")
|
|
394
|
+
if not self.generate_tex:
|
|
395
|
+
delete_folder_list.extend(["tex", "tex-subsection"])
|
|
396
|
+
|
|
397
|
+
for d in delete_folder_list:
|
|
398
|
+
path_delete = os.path.join(p_combine, f"{d}")
|
|
399
|
+
if os.path.exists(path_delete):
|
|
400
|
+
shutil.rmtree(path_delete)
|