pyeasyphd 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyeasyphd might be problematic. Click here for more details.
- pyeasyphd/.python-version +1 -0
- pyeasyphd/Main.sublime-menu +43 -0
- pyeasyphd/__init__.py +0 -0
- pyeasyphd/bib/__init__.py +1 -0
- pyeasyphd/bib/bibtexbase/__init__.py +7 -0
- pyeasyphd/bib/bibtexbase/standardize/_base.py +36 -0
- pyeasyphd/bib/bibtexbase/standardize/default_data.py +97 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +54 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +38 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +310 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +35 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +34 -0
- pyeasyphd/bib/bibtexbase/standardize_bib.py +75 -0
- pyeasyphd/bib/bibtexparser/__init__.py +47 -0
- pyeasyphd/bib/bibtexparser/bibtex_format.py +87 -0
- pyeasyphd/bib/bibtexparser/exceptions.py +64 -0
- pyeasyphd/bib/bibtexparser/library.py +207 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/add.py +94 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +22 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +62 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +47 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +31 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +222 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +34 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +33 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +70 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +15 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +113 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +34 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +21 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +28 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/title.py +20 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +98 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +29 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +124 -0
- pyeasyphd/bib/bibtexparser/middlewares/middleware.py +222 -0
- pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +13 -0
- pyeasyphd/bib/bibtexparser/middlewares/utils.py +226 -0
- pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +414 -0
- pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +42 -0
- pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +35 -0
- pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +29 -0
- pyeasyphd/bib/bibtexparser/model.py +481 -0
- pyeasyphd/bib/bibtexparser/splitter.py +151 -0
- pyeasyphd/bib/core/__init__.py +18 -0
- pyeasyphd/bib/core/convert_library_to_library.py +31 -0
- pyeasyphd/bib/core/convert_library_to_str.py +199 -0
- pyeasyphd/bib/core/convert_str_to_library.py +34 -0
- pyeasyphd/bib/core/convert_str_to_str.py +27 -0
- pyeasyphd/main/__init__.py +17 -0
- pyeasyphd/main/basic_input.py +149 -0
- pyeasyphd/main/pandoc_md_to.py +361 -0
- pyeasyphd/main/python_run_bib.py +73 -0
- pyeasyphd/main/python_run_md.py +235 -0
- pyeasyphd/main/python_run_tex.py +149 -0
- pyeasyphd/main/python_writers.py +212 -0
- pyeasyphd/pyeasyphd.py +72 -0
- pyeasyphd/pyeasyphd.sublime-settings +235 -0
- pyeasyphd/pyeasyphd.sublime-syntax +5 -0
- pyeasyphd/tools/__init__.py +30 -0
- pyeasyphd/tools/compare/compare_bibs.py +234 -0
- pyeasyphd/tools/experiments_base.py +203 -0
- pyeasyphd/tools/format_save_bibs.py +178 -0
- pyeasyphd/tools/generate/generate_from_bibs.py +447 -0
- pyeasyphd/tools/generate/generate_links.py +356 -0
- pyeasyphd/tools/py_run_bib_md_tex.py +378 -0
- pyeasyphd/tools/replace/replace.py +81 -0
- pyeasyphd/tools/search/data.py +318 -0
- pyeasyphd/tools/search/search_base.py +118 -0
- pyeasyphd/tools/search/search_core.py +326 -0
- pyeasyphd/tools/search/search_keywords.py +227 -0
- pyeasyphd/tools/search/search_writers.py +288 -0
- pyeasyphd/tools/search/utils.py +152 -0
- pyeasyphd/tools/spider/process_spider_bib.py +247 -0
- pyeasyphd/tools/spider/process_spider_url.py +74 -0
- pyeasyphd/tools/spider/process_spider_url_bib.py +62 -0
- pyeasyphd/utils/utils.py +62 -0
- pyeasyphd-0.0.2.dist-info/METADATA +27 -0
- pyeasyphd-0.0.2.dist-info/RECORD +80 -0
- pyeasyphd-0.0.2.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,326 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
import shutil
|
|
5
|
+
from typing import Any, Dict, List, Tuple
|
|
6
|
+
|
|
7
|
+
from pyadvtools import (
|
|
8
|
+
IterateCombineExtendDict,
|
|
9
|
+
IterateUpdateDict,
|
|
10
|
+
combine_content_in_list,
|
|
11
|
+
pairwise_combine_in_list,
|
|
12
|
+
read_list,
|
|
13
|
+
sort_int_str,
|
|
14
|
+
standard_path,
|
|
15
|
+
write_list,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
from ...bib.bibtexparser import Library
|
|
19
|
+
from ...main import BasicInput, PythonRunBib
|
|
20
|
+
from .search_base import SearchInitialResult
|
|
21
|
+
from .search_writers import WriteAbbrCombinedResults
|
|
22
|
+
from .utils import keywords_type_for_title, switch_keywords_list, switch_keywords_type
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SearchResultsCore(BasicInput):
|
|
26
|
+
"""Generate tex, md, html, and pdf.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
path_storage (str): the path of storage `abbr`
|
|
30
|
+
path_output (str): the path of output `abbr`
|
|
31
|
+
path_separate (str): the path of separate `abbr`
|
|
32
|
+
j_conf_abbr (str): the abbreviation of journal or conference
|
|
33
|
+
options (dict): options
|
|
34
|
+
|
|
35
|
+
Attributes:
|
|
36
|
+
path_storage (str): the path of storage
|
|
37
|
+
path_output (str): the path of output
|
|
38
|
+
path_separate (str): the path of separate
|
|
39
|
+
j_conf_abbr (str): the abbreviation of journal or conference
|
|
40
|
+
|
|
41
|
+
is_standard_bib_file_name (bool = True): whether the bib file name is standard
|
|
42
|
+
keywords_type_list (List[str] = []): keywords type list
|
|
43
|
+
keywords_dict (dict = {}): keywords dict
|
|
44
|
+
delete_redundant_files (bool = True): delete redundant files
|
|
45
|
+
generate_basic_md (bool = False): generate basic md
|
|
46
|
+
generate_beauty_md (bool = False): generate beauty md
|
|
47
|
+
generate_complex_md (bool = True): generate complex md
|
|
48
|
+
generate_tex (bool = False): generate tex
|
|
49
|
+
first_field_second_keywords (bool = True): first field second keywords
|
|
50
|
+
deepcopy_library_for_every_field (bool = False): deepcopy library for every field
|
|
51
|
+
deepcopy_library_for_every_keywords (bool = False): deepcopy library for every keywords
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self, path_storage: str, path_output: str, path_separate: str, j_conf_abbr: str, options: Dict[str, Any]
|
|
56
|
+
) -> None:
|
|
57
|
+
super().__init__(options)
|
|
58
|
+
self.path_storage: str = standard_path(path_storage)
|
|
59
|
+
self.path_output: str = standard_path(path_output)
|
|
60
|
+
self.path_separate: str = standard_path(path_separate)
|
|
61
|
+
self.j_conf_abbr: str = j_conf_abbr
|
|
62
|
+
|
|
63
|
+
# for bib
|
|
64
|
+
# Whether the bib file name is standard, such as `TEVC_2023.bib`.
|
|
65
|
+
self.is_standard_bib_file_name: bool = options.get("is_standard_bib_file_name", True) # TEVC_2023.bib
|
|
66
|
+
|
|
67
|
+
# for search
|
|
68
|
+
self.keywords_dict = options.get("default_keywords_dict", {})
|
|
69
|
+
if temp := options.get("keywords_dict", []):
|
|
70
|
+
self.keywords_dict = temp
|
|
71
|
+
|
|
72
|
+
if keywords_type_list := options.get("keywords_type_list", []):
|
|
73
|
+
self.keywords_dict = {k: v for k, v in self.keywords_dict.items() if k in keywords_type_list}
|
|
74
|
+
|
|
75
|
+
self.keywords_dict = {switch_keywords_type(k): v for k, v in self.keywords_dict.items()}
|
|
76
|
+
|
|
77
|
+
self.search_field_list = options.get("default_search_field_list", ["title", "abstract"])
|
|
78
|
+
if temp := options.get("search_field_list", []):
|
|
79
|
+
self.search_field_list = temp
|
|
80
|
+
|
|
81
|
+
# for pandoc
|
|
82
|
+
self.delete_redundant_files: bool = options.get("delete_redundant_files", True)
|
|
83
|
+
|
|
84
|
+
# for md
|
|
85
|
+
self.generate_basic_md: bool = options.get("generate_basic_md", False)
|
|
86
|
+
self.generate_beauty_md: bool = options.get("generate_beauty_md", False)
|
|
87
|
+
self.generate_complex_md: bool = options.get("generate_complex_md", True)
|
|
88
|
+
|
|
89
|
+
# for tex
|
|
90
|
+
self.generate_tex = options.get("generate_tex", False)
|
|
91
|
+
|
|
92
|
+
# for search
|
|
93
|
+
self.first_field_second_keywords = options.get("first_field_second_keywords", True)
|
|
94
|
+
self.deepcopy_library_for_every_field = options.get("deepcopy_library_for_every_field", False)
|
|
95
|
+
self.deepcopy_library_for_every_keywords = options.get("deepcopy_library_for_every_keywords", False)
|
|
96
|
+
|
|
97
|
+
# for bib
|
|
98
|
+
self._python_bib = PythonRunBib(options)
|
|
99
|
+
|
|
100
|
+
def optimize(self, search_year_list: List[str] = []) -> Dict[str, Dict[str, Dict[str, Dict[str, int]]]]:
|
|
101
|
+
search_year_list = list(set([str(i) for i in search_year_list]))
|
|
102
|
+
|
|
103
|
+
data_list = self._obtain_full_files_data(self.path_storage, "bib", search_year_list)
|
|
104
|
+
|
|
105
|
+
entry_type_keyword_type_keyword_field_number_dict = self.optimize_core(data_list, search_year_list)
|
|
106
|
+
return entry_type_keyword_type_keyword_field_number_dict
|
|
107
|
+
|
|
108
|
+
def _obtain_full_files_data(self, path_storage: str, extension: str, search_year_list: List[str] = []) -> List[str]:
|
|
109
|
+
regex = None
|
|
110
|
+
if self.is_standard_bib_file_name and search_year_list:
|
|
111
|
+
regex = re.compile(f'({"|".join(search_year_list)})')
|
|
112
|
+
|
|
113
|
+
file_list = []
|
|
114
|
+
for root, _, files in os.walk(path_storage, topdown=True):
|
|
115
|
+
files = [f for f in files if f.endswith(f".{extension}")]
|
|
116
|
+
|
|
117
|
+
if regex:
|
|
118
|
+
files = [f for f in files if regex.search(f)]
|
|
119
|
+
|
|
120
|
+
file_list.extend([os.path.join(root, f) for f in files])
|
|
121
|
+
|
|
122
|
+
return combine_content_in_list([read_list(f, "r") for f in sort_int_str(file_list)], None)
|
|
123
|
+
|
|
124
|
+
def optimize_core(self, data_list: List[str], search_year_list) -> Dict[str, Dict[str, Dict[str, Dict[str, int]]]]:
|
|
125
|
+
print("\n" + "*" * 9 + f" Search in {self.j_conf_abbr} " + "*" * 9)
|
|
126
|
+
|
|
127
|
+
entry_type_year_volume_number_month_entry_dict = self._python_bib.parse_to_nested_entries_dict(data_list)
|
|
128
|
+
|
|
129
|
+
# generate standard bib and output
|
|
130
|
+
entry_type_keyword_type_keyword_field_number_dict: Dict[str, Dict[str, Dict[str, Dict[str, int]]]] = {}
|
|
131
|
+
for entry_type in entry_type_year_volume_number_month_entry_dict:
|
|
132
|
+
|
|
133
|
+
# obtain search years
|
|
134
|
+
year_list = list(entry_type_year_volume_number_month_entry_dict[entry_type].keys())
|
|
135
|
+
if search_year_list:
|
|
136
|
+
year_list = [y for y in year_list if y in search_year_list]
|
|
137
|
+
year_list = sort_int_str(year_list, reverse=True)
|
|
138
|
+
if not year_list:
|
|
139
|
+
print("year_list is empty.")
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
# output prefix
|
|
143
|
+
output_prefix = "-".join([self.j_conf_abbr, year_list[-1], year_list[0]])
|
|
144
|
+
|
|
145
|
+
# generate paths
|
|
146
|
+
p_origin = os.path.join(self.path_output, entry_type, f"{output_prefix}-Origin")
|
|
147
|
+
p_separate = os.path.join(self.path_separate, entry_type)
|
|
148
|
+
p_combine = os.path.join(self.path_output, entry_type, f"{output_prefix}-Combine")
|
|
149
|
+
|
|
150
|
+
# obtain library
|
|
151
|
+
new_dict = {year: entry_type_year_volume_number_month_entry_dict[entry_type][year] for year in year_list}
|
|
152
|
+
entries = IterateCombineExtendDict().dict_update(new_dict)
|
|
153
|
+
library = Library(entries)
|
|
154
|
+
|
|
155
|
+
# search, generate and save
|
|
156
|
+
keyword_type_keyword_field_number_dict = {}
|
|
157
|
+
for keywords_type in self.keywords_dict:
|
|
158
|
+
library = copy.deepcopy(library)
|
|
159
|
+
|
|
160
|
+
if self.first_field_second_keywords:
|
|
161
|
+
keyword_field_number_dict = self._optimize_fields_keyword(
|
|
162
|
+
keywords_type, library, output_prefix, p_origin, p_separate, p_combine
|
|
163
|
+
)
|
|
164
|
+
else:
|
|
165
|
+
keyword_field_number_dict = self._optimize_keywords_field(
|
|
166
|
+
keywords_type, library, output_prefix, p_origin, p_separate, p_combine
|
|
167
|
+
)
|
|
168
|
+
keyword_type_keyword_field_number_dict.update({keywords_type: keyword_field_number_dict})
|
|
169
|
+
|
|
170
|
+
# collect results
|
|
171
|
+
entry_type_keyword_type_keyword_field_number_dict.setdefault(entry_type, {}).update(
|
|
172
|
+
keyword_type_keyword_field_number_dict
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
return entry_type_keyword_type_keyword_field_number_dict
|
|
176
|
+
|
|
177
|
+
def _optimize_fields_keyword(self, keywords_type, library, output_prefix, p_origin, p_separate, p_combine):
|
|
178
|
+
no_search_library = library
|
|
179
|
+
|
|
180
|
+
keyword_field_number_dict_ = {}
|
|
181
|
+
for field in self.search_field_list:
|
|
182
|
+
keyword_field_number_dict, no_search_library = self.core_optimize(
|
|
183
|
+
[field],
|
|
184
|
+
keywords_type,
|
|
185
|
+
no_search_library,
|
|
186
|
+
output_prefix,
|
|
187
|
+
p_origin,
|
|
188
|
+
p_separate,
|
|
189
|
+
p_combine,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
if self.deepcopy_library_for_every_field:
|
|
193
|
+
no_search_library = copy.deepcopy(library)
|
|
194
|
+
|
|
195
|
+
temp = keyword_field_number_dict
|
|
196
|
+
keyword_field_number_dict_ = IterateUpdateDict().dict_update(keyword_field_number_dict_, temp)
|
|
197
|
+
return keyword_field_number_dict_
|
|
198
|
+
|
|
199
|
+
def _optimize_keywords_field(self, keywords_type, library, output_prefix, p_origin, p_separate, p_combine):
|
|
200
|
+
no_search_library = library
|
|
201
|
+
|
|
202
|
+
keyword_field_number_dict, no_search_library = self.core_optimize(
|
|
203
|
+
self.search_field_list,
|
|
204
|
+
keywords_type,
|
|
205
|
+
no_search_library,
|
|
206
|
+
output_prefix,
|
|
207
|
+
p_origin,
|
|
208
|
+
p_separate,
|
|
209
|
+
p_combine,
|
|
210
|
+
)
|
|
211
|
+
return keyword_field_number_dict
|
|
212
|
+
|
|
213
|
+
def core_optimize(
|
|
214
|
+
self,
|
|
215
|
+
search_field_list: List[str],
|
|
216
|
+
keywords_type,
|
|
217
|
+
library: Library,
|
|
218
|
+
output_prefix: str,
|
|
219
|
+
p_origin: str,
|
|
220
|
+
p_separate: str,
|
|
221
|
+
p_combine: str,
|
|
222
|
+
) -> Tuple[Dict[str, Dict[str, int]], Library]:
|
|
223
|
+
error_pandoc_md_md: List[str] = []
|
|
224
|
+
save_field_data_dict: Dict[str, List[List[str]]] = {}
|
|
225
|
+
keyword_field_number_dict: Dict[str, Dict[str, int]] = {}
|
|
226
|
+
|
|
227
|
+
no_search_library = library
|
|
228
|
+
for keywords_list in self.keywords_dict[keywords_type]:
|
|
229
|
+
|
|
230
|
+
print(f"{output_prefix}-{keywords_type}-search-{keywords_list}")
|
|
231
|
+
keywords_list_list, combine_keyword = switch_keywords_list(keywords_list)
|
|
232
|
+
|
|
233
|
+
# for initial results
|
|
234
|
+
error_md, field_data_dict, field_number_dict, no_search_library = SearchInitialResult(
|
|
235
|
+
copy.deepcopy(self.options)
|
|
236
|
+
).main(
|
|
237
|
+
search_field_list,
|
|
238
|
+
p_origin,
|
|
239
|
+
no_search_library,
|
|
240
|
+
keywords_type,
|
|
241
|
+
keywords_list_list,
|
|
242
|
+
combine_keyword,
|
|
243
|
+
output_prefix,
|
|
244
|
+
p_separate,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
if self.deepcopy_library_for_every_keywords:
|
|
248
|
+
no_search_library = copy.deepcopy(library)
|
|
249
|
+
|
|
250
|
+
# collect error parts
|
|
251
|
+
error_pandoc_md_md.extend(error_md)
|
|
252
|
+
|
|
253
|
+
# collect data
|
|
254
|
+
for field in field_data_dict:
|
|
255
|
+
temp = pairwise_combine_in_list(save_field_data_dict.get(field, []), field_data_dict[field], "\n")
|
|
256
|
+
save_field_data_dict.update({field: temp})
|
|
257
|
+
|
|
258
|
+
# collect number
|
|
259
|
+
keyword_field_number_dict.update({combine_keyword: field_number_dict})
|
|
260
|
+
|
|
261
|
+
kws_type = keywords_type_for_title(keywords_type)
|
|
262
|
+
flag = "-".join(search_field_list)
|
|
263
|
+
|
|
264
|
+
# for error parts in pandoc markdown to markdown
|
|
265
|
+
if error_pandoc_md_md:
|
|
266
|
+
error_pandoc_md_md.insert(0, f"# Error in pandoc md to md for {kws_type}\n\n")
|
|
267
|
+
write_list(error_pandoc_md_md, rf"{flag}_{output_prefix}_error_pandoc_md_md.md", "a", p_combine)
|
|
268
|
+
|
|
269
|
+
# combine part
|
|
270
|
+
# for combined results
|
|
271
|
+
error_pandoc_md_pdf, error_pandoc_md_html = WriteAbbrCombinedResults(copy.deepcopy(self.options)).main(
|
|
272
|
+
search_field_list, keywords_type, save_field_data_dict, p_combine
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# for error parts in pandoc markdown to pdf
|
|
276
|
+
if error_pandoc_md_pdf:
|
|
277
|
+
error_pandoc_md_pdf.insert(0, f"# Error in pandoc md to pdf for {kws_type}\n\n")
|
|
278
|
+
write_list(error_pandoc_md_pdf, rf"{flag}_{output_prefix}_error_pandoc_md_pdf.md", "a", p_combine)
|
|
279
|
+
|
|
280
|
+
# for error parts in pandoc markdown to html
|
|
281
|
+
if error_pandoc_md_html:
|
|
282
|
+
error_pandoc_md_html.insert(0, f"# Error in pandoc md to html for {kws_type}\n\n")
|
|
283
|
+
write_list(error_pandoc_md_html, rf"{flag}_{output_prefix}_error_pandoc_md_html.md", "a", p_combine)
|
|
284
|
+
|
|
285
|
+
# delete redundant files
|
|
286
|
+
if self.delete_redundant_files:
|
|
287
|
+
self.delete_files(keywords_type, p_origin, p_separate, p_combine)
|
|
288
|
+
|
|
289
|
+
return keyword_field_number_dict, no_search_library
|
|
290
|
+
|
|
291
|
+
def delete_files(self, keywords_type: str, p_origin: str, p_separate: str, p_combine: str) -> None:
|
|
292
|
+
"""Delete some redundant files."""
|
|
293
|
+
# for initial tex md bib
|
|
294
|
+
if os.path.exists(p_origin):
|
|
295
|
+
shutil.rmtree(p_origin)
|
|
296
|
+
|
|
297
|
+
# for separate keywords
|
|
298
|
+
delete_folder_list = []
|
|
299
|
+
if not self.generate_basic_md:
|
|
300
|
+
delete_folder_list.append("basic")
|
|
301
|
+
if not self.generate_beauty_md:
|
|
302
|
+
delete_folder_list.append("beauty")
|
|
303
|
+
if not self.generate_complex_md:
|
|
304
|
+
delete_folder_list.append("complex")
|
|
305
|
+
|
|
306
|
+
for d in delete_folder_list:
|
|
307
|
+
for field in self.search_field_list:
|
|
308
|
+
path_delete = os.path.join(p_separate, rf"{keywords_type}/{field}-md-{d}")
|
|
309
|
+
if os.path.exists(path_delete):
|
|
310
|
+
shutil.rmtree(path_delete)
|
|
311
|
+
|
|
312
|
+
# for combine
|
|
313
|
+
delete_folder_list = ["md"]
|
|
314
|
+
if not self.generate_basic_md:
|
|
315
|
+
delete_folder_list.append("md-basic")
|
|
316
|
+
if not self.generate_beauty_md:
|
|
317
|
+
delete_folder_list.append("md-beauty")
|
|
318
|
+
if not self.generate_complex_md:
|
|
319
|
+
delete_folder_list.append("md-complex")
|
|
320
|
+
if not self.generate_tex:
|
|
321
|
+
delete_folder_list.extend(["tex", "tex-subsection"])
|
|
322
|
+
|
|
323
|
+
for d in delete_folder_list:
|
|
324
|
+
path_delete = os.path.join(p_combine, f"{d}")
|
|
325
|
+
if os.path.exists(path_delete):
|
|
326
|
+
shutil.rmtree(path_delete)
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from typing import Any, Dict, List
|
|
5
|
+
|
|
6
|
+
from pyadvtools import (
|
|
7
|
+
generate_nested_dict,
|
|
8
|
+
read_list,
|
|
9
|
+
standard_path,
|
|
10
|
+
write_list,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from ...main import PandocMdTo
|
|
14
|
+
from ...utils.utils import html_head, html_style, html_tail
|
|
15
|
+
from ..experiments_base import generate_standard_publisher_abbr_options_dict
|
|
16
|
+
from .data import obtain_search_keywords
|
|
17
|
+
from .search_core import SearchResultsCore
|
|
18
|
+
from .utils import extract_information, temp_html_style
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Searchkeywords(object):
|
|
22
|
+
"""Search.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
path_storage (str): the path of storage journals or conferences
|
|
26
|
+
path_output (str): the path of output journals or conferences
|
|
27
|
+
options (dict): options
|
|
28
|
+
|
|
29
|
+
Attributes:
|
|
30
|
+
path_storage (str): the path of storage
|
|
31
|
+
path_output (str): the path of output
|
|
32
|
+
options (dict): options
|
|
33
|
+
|
|
34
|
+
search_year_list (List[str] = []): search year list
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, path_storage: str, path_output: str, options: Dict[str, Any]) -> None:
|
|
38
|
+
self.path_storage = standard_path(path_storage)
|
|
39
|
+
self.path_output = standard_path(path_output)
|
|
40
|
+
|
|
41
|
+
options_ = {}
|
|
42
|
+
options_["display_one_line_reference_note"] = True # default is False
|
|
43
|
+
options_["is_standardize_bib"] = False # default is True
|
|
44
|
+
options_["choose_abbr_zotero_save"] = "save" # default is "save"
|
|
45
|
+
options_["function_common_again"] = True # default is True
|
|
46
|
+
options_["function_common_again_abbr"] = False # default is True
|
|
47
|
+
options_["function_common_again_zotero"] = False # default is True
|
|
48
|
+
options_["function_common_again_save"] = False # default is True
|
|
49
|
+
options_["is_sort_entry_fields"] = True # default is False
|
|
50
|
+
options_["is_sort_entries_by_field_keys"] = True # default is False
|
|
51
|
+
options_["sort_entries_by_field_keys_reverse"] = True # default is False
|
|
52
|
+
options_["generate_entry_cite_keys"] = True # default is False
|
|
53
|
+
|
|
54
|
+
options_["default_keywords_dict"] = obtain_search_keywords()
|
|
55
|
+
options_["default_search_field_list"] = ["title", "abstract"]
|
|
56
|
+
options_.update(options)
|
|
57
|
+
self.options = options_
|
|
58
|
+
|
|
59
|
+
self.search_year_list = options.get("search_year_list", [])
|
|
60
|
+
self._path_separate = self.path_output + "-Separate"
|
|
61
|
+
|
|
62
|
+
self._path_statistic = self.path_output + "-Statistics"
|
|
63
|
+
self._path_combine = self.path_output + "-Combine"
|
|
64
|
+
|
|
65
|
+
def run(self) -> None:
|
|
66
|
+
all_dict = {}
|
|
67
|
+
publisher_abbr_dict = generate_standard_publisher_abbr_options_dict(self.path_storage, self.options)
|
|
68
|
+
for publisher in publisher_abbr_dict:
|
|
69
|
+
for abbr in publisher_abbr_dict[publisher]:
|
|
70
|
+
options = publisher_abbr_dict[publisher][abbr]
|
|
71
|
+
|
|
72
|
+
path_storage = os.path.join(self.path_storage, publisher, abbr)
|
|
73
|
+
path_output = os.path.join(self.path_output, publisher, abbr)
|
|
74
|
+
entry_type_keyword_type_keyword_field_number_dict = SearchResultsCore(
|
|
75
|
+
path_storage, path_output, self._path_separate, abbr, options
|
|
76
|
+
).optimize(copy.deepcopy(self.search_year_list))
|
|
77
|
+
|
|
78
|
+
all_dict.update({abbr: entry_type_keyword_type_keyword_field_number_dict})
|
|
79
|
+
|
|
80
|
+
if not self.options.get("print_on_screen", False):
|
|
81
|
+
extract_information(all_dict, self._path_statistic)
|
|
82
|
+
|
|
83
|
+
print()
|
|
84
|
+
self._generate_bib_html_for_publisher(publisher_abbr_dict, "bib")
|
|
85
|
+
print()
|
|
86
|
+
self._generate_bib_html_for_publisher(publisher_abbr_dict, "html")
|
|
87
|
+
self._generate_link_to_bib_html_for_combine()
|
|
88
|
+
|
|
89
|
+
print()
|
|
90
|
+
self._pandoc_md_to_html_in_path_separate()
|
|
91
|
+
self._generate_link_to_html_bib_for_separate()
|
|
92
|
+
|
|
93
|
+
return None
|
|
94
|
+
|
|
95
|
+
def _extract_files(
|
|
96
|
+
self, publisher_abbr_dict: dict, ext: str = "html"
|
|
97
|
+
) -> Dict[str, Dict[str, Dict[str, Dict[str, List[str]]]]]:
|
|
98
|
+
data_dict = {}
|
|
99
|
+
for publisher in publisher_abbr_dict:
|
|
100
|
+
for abbr in publisher_abbr_dict[publisher]:
|
|
101
|
+
p = os.path.join(self.path_output, publisher, abbr)
|
|
102
|
+
if not os.path.exists(p):
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
for entry_type in [f for f in os.listdir(p) if os.path.isdir(os.path.join(p, f))]:
|
|
106
|
+
if not (folders := [f for f in os.listdir(os.path.join(p, entry_type)) if "combine" in f.lower()]):
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
for root, _, files in os.walk(os.path.join(p, entry_type, folders[0])):
|
|
110
|
+
for file in [f for f in files if f.endswith(ext)]:
|
|
111
|
+
(
|
|
112
|
+
data_dict.setdefault(file, {})
|
|
113
|
+
.setdefault(entry_type, {})
|
|
114
|
+
.setdefault(publisher, {})
|
|
115
|
+
.setdefault(abbr, [])
|
|
116
|
+
.append(os.path.join(root, file))
|
|
117
|
+
)
|
|
118
|
+
return data_dict
|
|
119
|
+
|
|
120
|
+
def _generate_bib_html_for_publisher(self, publisher_abbr_dict, ext: str = "html") -> None:
|
|
121
|
+
data_dict = self._extract_files(publisher_abbr_dict, ext)
|
|
122
|
+
for file in data_dict:
|
|
123
|
+
basename = file.split(".")[0]
|
|
124
|
+
for entry_type in data_dict[file]:
|
|
125
|
+
for publisher in data_dict[file][entry_type]:
|
|
126
|
+
|
|
127
|
+
print(f"Generate {ext} for `{publisher}-{entry_type}-{basename}`")
|
|
128
|
+
data_list = []
|
|
129
|
+
for abbr in data_dict[file][entry_type][publisher]:
|
|
130
|
+
for i in range(ll := len(data_dict[file][entry_type][publisher][abbr])):
|
|
131
|
+
full_file = data_dict[file][entry_type][publisher][abbr][i]
|
|
132
|
+
temp_data_list = read_list(full_file, "r", None)
|
|
133
|
+
if ext == "html":
|
|
134
|
+
if mch := re.search(r"(<h3.*)</body>", "".join(temp_data_list), re.DOTALL):
|
|
135
|
+
temp_data_list = mch.group(1).splitlines(keepends=True)
|
|
136
|
+
|
|
137
|
+
data_list.extend(temp_data_list)
|
|
138
|
+
if i < (ll - 1):
|
|
139
|
+
data_list.append("\n")
|
|
140
|
+
data_list.append("\n")
|
|
141
|
+
|
|
142
|
+
p = os.path.join(self._path_combine, entry_type, publisher, ext)
|
|
143
|
+
if ext == "html":
|
|
144
|
+
data_list_ = [html_head.format(basename)]
|
|
145
|
+
data_list_.extend(html_style)
|
|
146
|
+
data_list_.append(f'<h2 id="{publisher.upper()}">{publisher.upper()}</h2>\n')
|
|
147
|
+
data_list_.extend(data_list)
|
|
148
|
+
data_list_.append(html_tail)
|
|
149
|
+
write_list(data_list_, f"{basename}.{ext}", "w", p, False)
|
|
150
|
+
|
|
151
|
+
else:
|
|
152
|
+
write_list(data_list, f"{basename}.{ext}", "w", p, False)
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
def _generate_link_to_bib_html_for_combine(self) -> None:
|
|
156
|
+
nested_dict = generate_nested_dict(self._path_combine)
|
|
157
|
+
|
|
158
|
+
for entry_type in nested_dict:
|
|
159
|
+
data_dict = {}
|
|
160
|
+
for publisher in nested_dict[entry_type]:
|
|
161
|
+
for ext in nested_dict[entry_type][publisher]:
|
|
162
|
+
if ext == "html":
|
|
163
|
+
for file in nested_dict[entry_type][publisher][ext]:
|
|
164
|
+
data_dict.setdefault(publisher, []).append(file)
|
|
165
|
+
|
|
166
|
+
if ext == "bib":
|
|
167
|
+
for file in nested_dict[entry_type][publisher][ext]:
|
|
168
|
+
if not re.search(r"\-zotero", file):
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
data_dict.setdefault(publisher, []).append(file)
|
|
172
|
+
|
|
173
|
+
data_list = self._html_format(entry_type, data_dict, "Publishers", "combine")
|
|
174
|
+
write_list(data_list, f"{entry_type.lower()}_links.html", "w", self._path_combine, False)
|
|
175
|
+
return None
|
|
176
|
+
|
|
177
|
+
def _pandoc_md_to_html_in_path_separate(self) -> None:
|
|
178
|
+
mds = []
|
|
179
|
+
for root, _, files in os.walk(self._path_separate):
|
|
180
|
+
mds.extend([os.path.join(root, f) for f in files if f.endswith(".md")])
|
|
181
|
+
|
|
182
|
+
for full_md in mds:
|
|
183
|
+
print(f"pandoc md to html for `{full_md.split(self._path_separate)[-1]}`")
|
|
184
|
+
full_html = full_md.replace("-md", "-html").replace(".md", ".html")
|
|
185
|
+
PandocMdTo({}).pandoc_md_to_html(full_md, full_html, None, None, True)
|
|
186
|
+
|
|
187
|
+
def _generate_link_to_html_bib_for_separate(self) -> None:
|
|
188
|
+
for entry_type in (nested_dict := generate_nested_dict(self._path_separate)):
|
|
189
|
+
data_dict = {}
|
|
190
|
+
for keywords_type in nested_dict[entry_type]:
|
|
191
|
+
for ext in nested_dict[entry_type][keywords_type]:
|
|
192
|
+
if not re.search(r"(\-html\-|\-bib\-zotero)", ext):
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
for file in nested_dict[entry_type][keywords_type][ext]:
|
|
196
|
+
data_dict.setdefault(os.path.basename(file).split(".")[0], []).append(file)
|
|
197
|
+
|
|
198
|
+
data_list = self._html_format(entry_type, data_dict, "Keywords", "separate")
|
|
199
|
+
write_list(data_list, f"{entry_type.lower()}_links.html", "w", self._path_separate, False)
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
@staticmethod
|
|
203
|
+
def _html_format(entry_type, data_dict, name_flag, index):
|
|
204
|
+
data_list = [html_head.format(f"{entry_type.title()} Links"), temp_html_style]
|
|
205
|
+
data_list.append('\n<table border="1">\n')
|
|
206
|
+
data_list.append(f"<caption>{entry_type.title()} Links</caption>\n")
|
|
207
|
+
|
|
208
|
+
data_list.extend(["<thead>\n", "<tr>\n", f"<th>{name_flag}</th>\n", "</tr>\n", "</thead>\n"])
|
|
209
|
+
|
|
210
|
+
x = '<td><a href="{}" target="_blank">{}</a></td>\n'
|
|
211
|
+
data_list.append("<tbody>\n")
|
|
212
|
+
for name in data_dict:
|
|
213
|
+
data_list.append("<tr>\n")
|
|
214
|
+
data_list.append(f"<td>{name}</td>\n")
|
|
215
|
+
|
|
216
|
+
for f in data_dict[name]:
|
|
217
|
+
if index == "combine":
|
|
218
|
+
data_list.append(x.format(f, f.split("-")[0].split("/")[-1].title() + ":" + f.split(".")[-1]))
|
|
219
|
+
elif index == "separate":
|
|
220
|
+
data_list.append(x.format(f, f.split("/")[-2].split("-")[0].title() + ":" + f.split(".")[-1]))
|
|
221
|
+
|
|
222
|
+
data_list.append("</tr>\n")
|
|
223
|
+
data_list.append("</tbody>\n")
|
|
224
|
+
|
|
225
|
+
data_list.append("</table>\n")
|
|
226
|
+
data_list.append(html_tail)
|
|
227
|
+
return data_list
|