pyeasyphd 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyeasyphd might be problematic. Click here for more details.

Files changed (80) hide show
  1. pyeasyphd/.python-version +1 -0
  2. pyeasyphd/Main.sublime-menu +43 -0
  3. pyeasyphd/__init__.py +0 -0
  4. pyeasyphd/bib/__init__.py +1 -0
  5. pyeasyphd/bib/bibtexbase/__init__.py +7 -0
  6. pyeasyphd/bib/bibtexbase/standardize/_base.py +36 -0
  7. pyeasyphd/bib/bibtexbase/standardize/default_data.py +97 -0
  8. pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +54 -0
  9. pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +38 -0
  10. pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +310 -0
  11. pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +35 -0
  12. pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +34 -0
  13. pyeasyphd/bib/bibtexbase/standardize_bib.py +75 -0
  14. pyeasyphd/bib/bibtexparser/__init__.py +47 -0
  15. pyeasyphd/bib/bibtexparser/bibtex_format.py +87 -0
  16. pyeasyphd/bib/bibtexparser/exceptions.py +64 -0
  17. pyeasyphd/bib/bibtexparser/library.py +207 -0
  18. pyeasyphd/bib/bibtexparser/middlewares/block/add.py +94 -0
  19. pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +22 -0
  20. pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +62 -0
  21. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +47 -0
  22. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +31 -0
  23. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +222 -0
  24. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +34 -0
  25. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +33 -0
  26. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +70 -0
  27. pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +15 -0
  28. pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +113 -0
  29. pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +34 -0
  30. pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +21 -0
  31. pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +28 -0
  32. pyeasyphd/bib/bibtexparser/middlewares/block/title.py +20 -0
  33. pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +98 -0
  34. pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +29 -0
  35. pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +124 -0
  36. pyeasyphd/bib/bibtexparser/middlewares/middleware.py +222 -0
  37. pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +13 -0
  38. pyeasyphd/bib/bibtexparser/middlewares/utils.py +226 -0
  39. pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +414 -0
  40. pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +42 -0
  41. pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +35 -0
  42. pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +29 -0
  43. pyeasyphd/bib/bibtexparser/model.py +481 -0
  44. pyeasyphd/bib/bibtexparser/splitter.py +151 -0
  45. pyeasyphd/bib/core/__init__.py +18 -0
  46. pyeasyphd/bib/core/convert_library_to_library.py +31 -0
  47. pyeasyphd/bib/core/convert_library_to_str.py +199 -0
  48. pyeasyphd/bib/core/convert_str_to_library.py +34 -0
  49. pyeasyphd/bib/core/convert_str_to_str.py +27 -0
  50. pyeasyphd/main/__init__.py +17 -0
  51. pyeasyphd/main/basic_input.py +149 -0
  52. pyeasyphd/main/pandoc_md_to.py +361 -0
  53. pyeasyphd/main/python_run_bib.py +73 -0
  54. pyeasyphd/main/python_run_md.py +235 -0
  55. pyeasyphd/main/python_run_tex.py +149 -0
  56. pyeasyphd/main/python_writers.py +212 -0
  57. pyeasyphd/pyeasyphd.py +72 -0
  58. pyeasyphd/pyeasyphd.sublime-settings +235 -0
  59. pyeasyphd/pyeasyphd.sublime-syntax +5 -0
  60. pyeasyphd/tools/__init__.py +30 -0
  61. pyeasyphd/tools/compare/compare_bibs.py +234 -0
  62. pyeasyphd/tools/experiments_base.py +203 -0
  63. pyeasyphd/tools/format_save_bibs.py +178 -0
  64. pyeasyphd/tools/generate/generate_from_bibs.py +447 -0
  65. pyeasyphd/tools/generate/generate_links.py +356 -0
  66. pyeasyphd/tools/py_run_bib_md_tex.py +378 -0
  67. pyeasyphd/tools/replace/replace.py +81 -0
  68. pyeasyphd/tools/search/data.py +318 -0
  69. pyeasyphd/tools/search/search_base.py +118 -0
  70. pyeasyphd/tools/search/search_core.py +326 -0
  71. pyeasyphd/tools/search/search_keywords.py +227 -0
  72. pyeasyphd/tools/search/search_writers.py +288 -0
  73. pyeasyphd/tools/search/utils.py +152 -0
  74. pyeasyphd/tools/spider/process_spider_bib.py +247 -0
  75. pyeasyphd/tools/spider/process_spider_url.py +74 -0
  76. pyeasyphd/tools/spider/process_spider_url_bib.py +62 -0
  77. pyeasyphd/utils/utils.py +62 -0
  78. pyeasyphd-0.0.2.dist-info/METADATA +27 -0
  79. pyeasyphd-0.0.2.dist-info/RECORD +80 -0
  80. pyeasyphd-0.0.2.dist-info/WHEEL +4 -0
@@ -0,0 +1,326 @@
1
+ import copy
2
+ import os
3
+ import re
4
+ import shutil
5
+ from typing import Any, Dict, List, Tuple
6
+
7
+ from pyadvtools import (
8
+ IterateCombineExtendDict,
9
+ IterateUpdateDict,
10
+ combine_content_in_list,
11
+ pairwise_combine_in_list,
12
+ read_list,
13
+ sort_int_str,
14
+ standard_path,
15
+ write_list,
16
+ )
17
+
18
+ from ...bib.bibtexparser import Library
19
+ from ...main import BasicInput, PythonRunBib
20
+ from .search_base import SearchInitialResult
21
+ from .search_writers import WriteAbbrCombinedResults
22
+ from .utils import keywords_type_for_title, switch_keywords_list, switch_keywords_type
23
+
24
+
25
+ class SearchResultsCore(BasicInput):
26
+ """Generate tex, md, html, and pdf.
27
+
28
+ Args:
29
+ path_storage (str): the path of storage `abbr`
30
+ path_output (str): the path of output `abbr`
31
+ path_separate (str): the path of separate `abbr`
32
+ j_conf_abbr (str): the abbreviation of journal or conference
33
+ options (dict): options
34
+
35
+ Attributes:
36
+ path_storage (str): the path of storage
37
+ path_output (str): the path of output
38
+ path_separate (str): the path of separate
39
+ j_conf_abbr (str): the abbreviation of journal or conference
40
+
41
+ is_standard_bib_file_name (bool = True): whether the bib file name is standard
42
+ keywords_type_list (List[str] = []): keywords type list
43
+ keywords_dict (dict = {}): keywords dict
44
+ delete_redundant_files (bool = True): delete redundant files
45
+ generate_basic_md (bool = False): generate basic md
46
+ generate_beauty_md (bool = False): generate beauty md
47
+ generate_complex_md (bool = True): generate complex md
48
+ generate_tex (bool = False): generate tex
49
+ first_field_second_keywords (bool = True): first field second keywords
50
+ deepcopy_library_for_every_field (bool = False): deepcopy library for every field
51
+ deepcopy_library_for_every_keywords (bool = False): deepcopy library for every keywords
52
+ """
53
+
54
+ def __init__(
55
+ self, path_storage: str, path_output: str, path_separate: str, j_conf_abbr: str, options: Dict[str, Any]
56
+ ) -> None:
57
+ super().__init__(options)
58
+ self.path_storage: str = standard_path(path_storage)
59
+ self.path_output: str = standard_path(path_output)
60
+ self.path_separate: str = standard_path(path_separate)
61
+ self.j_conf_abbr: str = j_conf_abbr
62
+
63
+ # for bib
64
+ # Whether the bib file name is standard, such as `TEVC_2023.bib`.
65
+ self.is_standard_bib_file_name: bool = options.get("is_standard_bib_file_name", True) # TEVC_2023.bib
66
+
67
+ # for search
68
+ self.keywords_dict = options.get("default_keywords_dict", {})
69
+ if temp := options.get("keywords_dict", []):
70
+ self.keywords_dict = temp
71
+
72
+ if keywords_type_list := options.get("keywords_type_list", []):
73
+ self.keywords_dict = {k: v for k, v in self.keywords_dict.items() if k in keywords_type_list}
74
+
75
+ self.keywords_dict = {switch_keywords_type(k): v for k, v in self.keywords_dict.items()}
76
+
77
+ self.search_field_list = options.get("default_search_field_list", ["title", "abstract"])
78
+ if temp := options.get("search_field_list", []):
79
+ self.search_field_list = temp
80
+
81
+ # for pandoc
82
+ self.delete_redundant_files: bool = options.get("delete_redundant_files", True)
83
+
84
+ # for md
85
+ self.generate_basic_md: bool = options.get("generate_basic_md", False)
86
+ self.generate_beauty_md: bool = options.get("generate_beauty_md", False)
87
+ self.generate_complex_md: bool = options.get("generate_complex_md", True)
88
+
89
+ # for tex
90
+ self.generate_tex = options.get("generate_tex", False)
91
+
92
+ # for search
93
+ self.first_field_second_keywords = options.get("first_field_second_keywords", True)
94
+ self.deepcopy_library_for_every_field = options.get("deepcopy_library_for_every_field", False)
95
+ self.deepcopy_library_for_every_keywords = options.get("deepcopy_library_for_every_keywords", False)
96
+
97
+ # for bib
98
+ self._python_bib = PythonRunBib(options)
99
+
100
+ def optimize(self, search_year_list: List[str] = []) -> Dict[str, Dict[str, Dict[str, Dict[str, int]]]]:
101
+ search_year_list = list(set([str(i) for i in search_year_list]))
102
+
103
+ data_list = self._obtain_full_files_data(self.path_storage, "bib", search_year_list)
104
+
105
+ entry_type_keyword_type_keyword_field_number_dict = self.optimize_core(data_list, search_year_list)
106
+ return entry_type_keyword_type_keyword_field_number_dict
107
+
108
+ def _obtain_full_files_data(self, path_storage: str, extension: str, search_year_list: List[str] = []) -> List[str]:
109
+ regex = None
110
+ if self.is_standard_bib_file_name and search_year_list:
111
+ regex = re.compile(f'({"|".join(search_year_list)})')
112
+
113
+ file_list = []
114
+ for root, _, files in os.walk(path_storage, topdown=True):
115
+ files = [f for f in files if f.endswith(f".{extension}")]
116
+
117
+ if regex:
118
+ files = [f for f in files if regex.search(f)]
119
+
120
+ file_list.extend([os.path.join(root, f) for f in files])
121
+
122
+ return combine_content_in_list([read_list(f, "r") for f in sort_int_str(file_list)], None)
123
+
124
+ def optimize_core(self, data_list: List[str], search_year_list) -> Dict[str, Dict[str, Dict[str, Dict[str, int]]]]:
125
+ print("\n" + "*" * 9 + f" Search in {self.j_conf_abbr} " + "*" * 9)
126
+
127
+ entry_type_year_volume_number_month_entry_dict = self._python_bib.parse_to_nested_entries_dict(data_list)
128
+
129
+ # generate standard bib and output
130
+ entry_type_keyword_type_keyword_field_number_dict: Dict[str, Dict[str, Dict[str, Dict[str, int]]]] = {}
131
+ for entry_type in entry_type_year_volume_number_month_entry_dict:
132
+
133
+ # obtain search years
134
+ year_list = list(entry_type_year_volume_number_month_entry_dict[entry_type].keys())
135
+ if search_year_list:
136
+ year_list = [y for y in year_list if y in search_year_list]
137
+ year_list = sort_int_str(year_list, reverse=True)
138
+ if not year_list:
139
+ print("year_list is empty.")
140
+ continue
141
+
142
+ # output prefix
143
+ output_prefix = "-".join([self.j_conf_abbr, year_list[-1], year_list[0]])
144
+
145
+ # generate paths
146
+ p_origin = os.path.join(self.path_output, entry_type, f"{output_prefix}-Origin")
147
+ p_separate = os.path.join(self.path_separate, entry_type)
148
+ p_combine = os.path.join(self.path_output, entry_type, f"{output_prefix}-Combine")
149
+
150
+ # obtain library
151
+ new_dict = {year: entry_type_year_volume_number_month_entry_dict[entry_type][year] for year in year_list}
152
+ entries = IterateCombineExtendDict().dict_update(new_dict)
153
+ library = Library(entries)
154
+
155
+ # search, generate and save
156
+ keyword_type_keyword_field_number_dict = {}
157
+ for keywords_type in self.keywords_dict:
158
+ library = copy.deepcopy(library)
159
+
160
+ if self.first_field_second_keywords:
161
+ keyword_field_number_dict = self._optimize_fields_keyword(
162
+ keywords_type, library, output_prefix, p_origin, p_separate, p_combine
163
+ )
164
+ else:
165
+ keyword_field_number_dict = self._optimize_keywords_field(
166
+ keywords_type, library, output_prefix, p_origin, p_separate, p_combine
167
+ )
168
+ keyword_type_keyword_field_number_dict.update({keywords_type: keyword_field_number_dict})
169
+
170
+ # collect results
171
+ entry_type_keyword_type_keyword_field_number_dict.setdefault(entry_type, {}).update(
172
+ keyword_type_keyword_field_number_dict
173
+ )
174
+
175
+ return entry_type_keyword_type_keyword_field_number_dict
176
+
177
+ def _optimize_fields_keyword(self, keywords_type, library, output_prefix, p_origin, p_separate, p_combine):
178
+ no_search_library = library
179
+
180
+ keyword_field_number_dict_ = {}
181
+ for field in self.search_field_list:
182
+ keyword_field_number_dict, no_search_library = self.core_optimize(
183
+ [field],
184
+ keywords_type,
185
+ no_search_library,
186
+ output_prefix,
187
+ p_origin,
188
+ p_separate,
189
+ p_combine,
190
+ )
191
+
192
+ if self.deepcopy_library_for_every_field:
193
+ no_search_library = copy.deepcopy(library)
194
+
195
+ temp = keyword_field_number_dict
196
+ keyword_field_number_dict_ = IterateUpdateDict().dict_update(keyword_field_number_dict_, temp)
197
+ return keyword_field_number_dict_
198
+
199
+ def _optimize_keywords_field(self, keywords_type, library, output_prefix, p_origin, p_separate, p_combine):
200
+ no_search_library = library
201
+
202
+ keyword_field_number_dict, no_search_library = self.core_optimize(
203
+ self.search_field_list,
204
+ keywords_type,
205
+ no_search_library,
206
+ output_prefix,
207
+ p_origin,
208
+ p_separate,
209
+ p_combine,
210
+ )
211
+ return keyword_field_number_dict
212
+
213
+ def core_optimize(
214
+ self,
215
+ search_field_list: List[str],
216
+ keywords_type,
217
+ library: Library,
218
+ output_prefix: str,
219
+ p_origin: str,
220
+ p_separate: str,
221
+ p_combine: str,
222
+ ) -> Tuple[Dict[str, Dict[str, int]], Library]:
223
+ error_pandoc_md_md: List[str] = []
224
+ save_field_data_dict: Dict[str, List[List[str]]] = {}
225
+ keyword_field_number_dict: Dict[str, Dict[str, int]] = {}
226
+
227
+ no_search_library = library
228
+ for keywords_list in self.keywords_dict[keywords_type]:
229
+
230
+ print(f"{output_prefix}-{keywords_type}-search-{keywords_list}")
231
+ keywords_list_list, combine_keyword = switch_keywords_list(keywords_list)
232
+
233
+ # for initial results
234
+ error_md, field_data_dict, field_number_dict, no_search_library = SearchInitialResult(
235
+ copy.deepcopy(self.options)
236
+ ).main(
237
+ search_field_list,
238
+ p_origin,
239
+ no_search_library,
240
+ keywords_type,
241
+ keywords_list_list,
242
+ combine_keyword,
243
+ output_prefix,
244
+ p_separate,
245
+ )
246
+
247
+ if self.deepcopy_library_for_every_keywords:
248
+ no_search_library = copy.deepcopy(library)
249
+
250
+ # collect error parts
251
+ error_pandoc_md_md.extend(error_md)
252
+
253
+ # collect data
254
+ for field in field_data_dict:
255
+ temp = pairwise_combine_in_list(save_field_data_dict.get(field, []), field_data_dict[field], "\n")
256
+ save_field_data_dict.update({field: temp})
257
+
258
+ # collect number
259
+ keyword_field_number_dict.update({combine_keyword: field_number_dict})
260
+
261
+ kws_type = keywords_type_for_title(keywords_type)
262
+ flag = "-".join(search_field_list)
263
+
264
+ # for error parts in pandoc markdown to markdown
265
+ if error_pandoc_md_md:
266
+ error_pandoc_md_md.insert(0, f"# Error in pandoc md to md for {kws_type}\n\n")
267
+ write_list(error_pandoc_md_md, rf"{flag}_{output_prefix}_error_pandoc_md_md.md", "a", p_combine)
268
+
269
+ # combine part
270
+ # for combined results
271
+ error_pandoc_md_pdf, error_pandoc_md_html = WriteAbbrCombinedResults(copy.deepcopy(self.options)).main(
272
+ search_field_list, keywords_type, save_field_data_dict, p_combine
273
+ )
274
+
275
+ # for error parts in pandoc markdown to pdf
276
+ if error_pandoc_md_pdf:
277
+ error_pandoc_md_pdf.insert(0, f"# Error in pandoc md to pdf for {kws_type}\n\n")
278
+ write_list(error_pandoc_md_pdf, rf"{flag}_{output_prefix}_error_pandoc_md_pdf.md", "a", p_combine)
279
+
280
+ # for error parts in pandoc markdown to html
281
+ if error_pandoc_md_html:
282
+ error_pandoc_md_html.insert(0, f"# Error in pandoc md to html for {kws_type}\n\n")
283
+ write_list(error_pandoc_md_html, rf"{flag}_{output_prefix}_error_pandoc_md_html.md", "a", p_combine)
284
+
285
+ # delete redundant files
286
+ if self.delete_redundant_files:
287
+ self.delete_files(keywords_type, p_origin, p_separate, p_combine)
288
+
289
+ return keyword_field_number_dict, no_search_library
290
+
291
+ def delete_files(self, keywords_type: str, p_origin: str, p_separate: str, p_combine: str) -> None:
292
+ """Delete some redundant files."""
293
+ # for initial tex md bib
294
+ if os.path.exists(p_origin):
295
+ shutil.rmtree(p_origin)
296
+
297
+ # for separate keywords
298
+ delete_folder_list = []
299
+ if not self.generate_basic_md:
300
+ delete_folder_list.append("basic")
301
+ if not self.generate_beauty_md:
302
+ delete_folder_list.append("beauty")
303
+ if not self.generate_complex_md:
304
+ delete_folder_list.append("complex")
305
+
306
+ for d in delete_folder_list:
307
+ for field in self.search_field_list:
308
+ path_delete = os.path.join(p_separate, rf"{keywords_type}/{field}-md-{d}")
309
+ if os.path.exists(path_delete):
310
+ shutil.rmtree(path_delete)
311
+
312
+ # for combine
313
+ delete_folder_list = ["md"]
314
+ if not self.generate_basic_md:
315
+ delete_folder_list.append("md-basic")
316
+ if not self.generate_beauty_md:
317
+ delete_folder_list.append("md-beauty")
318
+ if not self.generate_complex_md:
319
+ delete_folder_list.append("md-complex")
320
+ if not self.generate_tex:
321
+ delete_folder_list.extend(["tex", "tex-subsection"])
322
+
323
+ for d in delete_folder_list:
324
+ path_delete = os.path.join(p_combine, f"{d}")
325
+ if os.path.exists(path_delete):
326
+ shutil.rmtree(path_delete)
@@ -0,0 +1,227 @@
1
+ import copy
2
+ import os
3
+ import re
4
+ from typing import Any, Dict, List
5
+
6
+ from pyadvtools import (
7
+ generate_nested_dict,
8
+ read_list,
9
+ standard_path,
10
+ write_list,
11
+ )
12
+
13
+ from ...main import PandocMdTo
14
+ from ...utils.utils import html_head, html_style, html_tail
15
+ from ..experiments_base import generate_standard_publisher_abbr_options_dict
16
+ from .data import obtain_search_keywords
17
+ from .search_core import SearchResultsCore
18
+ from .utils import extract_information, temp_html_style
19
+
20
+
21
+ class Searchkeywords(object):
22
+ """Search.
23
+
24
+ Args:
25
+ path_storage (str): the path of storage journals or conferences
26
+ path_output (str): the path of output journals or conferences
27
+ options (dict): options
28
+
29
+ Attributes:
30
+ path_storage (str): the path of storage
31
+ path_output (str): the path of output
32
+ options (dict): options
33
+
34
+ search_year_list (List[str] = []): search year list
35
+ """
36
+
37
+ def __init__(self, path_storage: str, path_output: str, options: Dict[str, Any]) -> None:
38
+ self.path_storage = standard_path(path_storage)
39
+ self.path_output = standard_path(path_output)
40
+
41
+ options_ = {}
42
+ options_["display_one_line_reference_note"] = True # default is False
43
+ options_["is_standardize_bib"] = False # default is True
44
+ options_["choose_abbr_zotero_save"] = "save" # default is "save"
45
+ options_["function_common_again"] = True # default is True
46
+ options_["function_common_again_abbr"] = False # default is True
47
+ options_["function_common_again_zotero"] = False # default is True
48
+ options_["function_common_again_save"] = False # default is True
49
+ options_["is_sort_entry_fields"] = True # default is False
50
+ options_["is_sort_entries_by_field_keys"] = True # default is False
51
+ options_["sort_entries_by_field_keys_reverse"] = True # default is False
52
+ options_["generate_entry_cite_keys"] = True # default is False
53
+
54
+ options_["default_keywords_dict"] = obtain_search_keywords()
55
+ options_["default_search_field_list"] = ["title", "abstract"]
56
+ options_.update(options)
57
+ self.options = options_
58
+
59
+ self.search_year_list = options.get("search_year_list", [])
60
+ self._path_separate = self.path_output + "-Separate"
61
+
62
+ self._path_statistic = self.path_output + "-Statistics"
63
+ self._path_combine = self.path_output + "-Combine"
64
+
65
+ def run(self) -> None:
66
+ all_dict = {}
67
+ publisher_abbr_dict = generate_standard_publisher_abbr_options_dict(self.path_storage, self.options)
68
+ for publisher in publisher_abbr_dict:
69
+ for abbr in publisher_abbr_dict[publisher]:
70
+ options = publisher_abbr_dict[publisher][abbr]
71
+
72
+ path_storage = os.path.join(self.path_storage, publisher, abbr)
73
+ path_output = os.path.join(self.path_output, publisher, abbr)
74
+ entry_type_keyword_type_keyword_field_number_dict = SearchResultsCore(
75
+ path_storage, path_output, self._path_separate, abbr, options
76
+ ).optimize(copy.deepcopy(self.search_year_list))
77
+
78
+ all_dict.update({abbr: entry_type_keyword_type_keyword_field_number_dict})
79
+
80
+ if not self.options.get("print_on_screen", False):
81
+ extract_information(all_dict, self._path_statistic)
82
+
83
+ print()
84
+ self._generate_bib_html_for_publisher(publisher_abbr_dict, "bib")
85
+ print()
86
+ self._generate_bib_html_for_publisher(publisher_abbr_dict, "html")
87
+ self._generate_link_to_bib_html_for_combine()
88
+
89
+ print()
90
+ self._pandoc_md_to_html_in_path_separate()
91
+ self._generate_link_to_html_bib_for_separate()
92
+
93
+ return None
94
+
95
+ def _extract_files(
96
+ self, publisher_abbr_dict: dict, ext: str = "html"
97
+ ) -> Dict[str, Dict[str, Dict[str, Dict[str, List[str]]]]]:
98
+ data_dict = {}
99
+ for publisher in publisher_abbr_dict:
100
+ for abbr in publisher_abbr_dict[publisher]:
101
+ p = os.path.join(self.path_output, publisher, abbr)
102
+ if not os.path.exists(p):
103
+ continue
104
+
105
+ for entry_type in [f for f in os.listdir(p) if os.path.isdir(os.path.join(p, f))]:
106
+ if not (folders := [f for f in os.listdir(os.path.join(p, entry_type)) if "combine" in f.lower()]):
107
+ continue
108
+
109
+ for root, _, files in os.walk(os.path.join(p, entry_type, folders[0])):
110
+ for file in [f for f in files if f.endswith(ext)]:
111
+ (
112
+ data_dict.setdefault(file, {})
113
+ .setdefault(entry_type, {})
114
+ .setdefault(publisher, {})
115
+ .setdefault(abbr, [])
116
+ .append(os.path.join(root, file))
117
+ )
118
+ return data_dict
119
+
120
+ def _generate_bib_html_for_publisher(self, publisher_abbr_dict, ext: str = "html") -> None:
121
+ data_dict = self._extract_files(publisher_abbr_dict, ext)
122
+ for file in data_dict:
123
+ basename = file.split(".")[0]
124
+ for entry_type in data_dict[file]:
125
+ for publisher in data_dict[file][entry_type]:
126
+
127
+ print(f"Generate {ext} for `{publisher}-{entry_type}-{basename}`")
128
+ data_list = []
129
+ for abbr in data_dict[file][entry_type][publisher]:
130
+ for i in range(ll := len(data_dict[file][entry_type][publisher][abbr])):
131
+ full_file = data_dict[file][entry_type][publisher][abbr][i]
132
+ temp_data_list = read_list(full_file, "r", None)
133
+ if ext == "html":
134
+ if mch := re.search(r"(<h3.*)</body>", "".join(temp_data_list), re.DOTALL):
135
+ temp_data_list = mch.group(1).splitlines(keepends=True)
136
+
137
+ data_list.extend(temp_data_list)
138
+ if i < (ll - 1):
139
+ data_list.append("\n")
140
+ data_list.append("\n")
141
+
142
+ p = os.path.join(self._path_combine, entry_type, publisher, ext)
143
+ if ext == "html":
144
+ data_list_ = [html_head.format(basename)]
145
+ data_list_.extend(html_style)
146
+ data_list_.append(f'<h2 id="{publisher.upper()}">{publisher.upper()}</h2>\n')
147
+ data_list_.extend(data_list)
148
+ data_list_.append(html_tail)
149
+ write_list(data_list_, f"{basename}.{ext}", "w", p, False)
150
+
151
+ else:
152
+ write_list(data_list, f"{basename}.{ext}", "w", p, False)
153
+ return None
154
+
155
+ def _generate_link_to_bib_html_for_combine(self) -> None:
156
+ nested_dict = generate_nested_dict(self._path_combine)
157
+
158
+ for entry_type in nested_dict:
159
+ data_dict = {}
160
+ for publisher in nested_dict[entry_type]:
161
+ for ext in nested_dict[entry_type][publisher]:
162
+ if ext == "html":
163
+ for file in nested_dict[entry_type][publisher][ext]:
164
+ data_dict.setdefault(publisher, []).append(file)
165
+
166
+ if ext == "bib":
167
+ for file in nested_dict[entry_type][publisher][ext]:
168
+ if not re.search(r"\-zotero", file):
169
+ continue
170
+
171
+ data_dict.setdefault(publisher, []).append(file)
172
+
173
+ data_list = self._html_format(entry_type, data_dict, "Publishers", "combine")
174
+ write_list(data_list, f"{entry_type.lower()}_links.html", "w", self._path_combine, False)
175
+ return None
176
+
177
+ def _pandoc_md_to_html_in_path_separate(self) -> None:
178
+ mds = []
179
+ for root, _, files in os.walk(self._path_separate):
180
+ mds.extend([os.path.join(root, f) for f in files if f.endswith(".md")])
181
+
182
+ for full_md in mds:
183
+ print(f"pandoc md to html for `{full_md.split(self._path_separate)[-1]}`")
184
+ full_html = full_md.replace("-md", "-html").replace(".md", ".html")
185
+ PandocMdTo({}).pandoc_md_to_html(full_md, full_html, None, None, True)
186
+
187
+ def _generate_link_to_html_bib_for_separate(self) -> None:
188
+ for entry_type in (nested_dict := generate_nested_dict(self._path_separate)):
189
+ data_dict = {}
190
+ for keywords_type in nested_dict[entry_type]:
191
+ for ext in nested_dict[entry_type][keywords_type]:
192
+ if not re.search(r"(\-html\-|\-bib\-zotero)", ext):
193
+ continue
194
+
195
+ for file in nested_dict[entry_type][keywords_type][ext]:
196
+ data_dict.setdefault(os.path.basename(file).split(".")[0], []).append(file)
197
+
198
+ data_list = self._html_format(entry_type, data_dict, "Keywords", "separate")
199
+ write_list(data_list, f"{entry_type.lower()}_links.html", "w", self._path_separate, False)
200
+ return None
201
+
202
+ @staticmethod
203
+ def _html_format(entry_type, data_dict, name_flag, index):
204
+ data_list = [html_head.format(f"{entry_type.title()} Links"), temp_html_style]
205
+ data_list.append('\n<table border="1">\n')
206
+ data_list.append(f"<caption>{entry_type.title()} Links</caption>\n")
207
+
208
+ data_list.extend(["<thead>\n", "<tr>\n", f"<th>{name_flag}</th>\n", "</tr>\n", "</thead>\n"])
209
+
210
+ x = '<td><a href="{}" target="_blank">{}</a></td>\n'
211
+ data_list.append("<tbody>\n")
212
+ for name in data_dict:
213
+ data_list.append("<tr>\n")
214
+ data_list.append(f"<td>{name}</td>\n")
215
+
216
+ for f in data_dict[name]:
217
+ if index == "combine":
218
+ data_list.append(x.format(f, f.split("-")[0].split("/")[-1].title() + ":" + f.split(".")[-1]))
219
+ elif index == "separate":
220
+ data_list.append(x.format(f, f.split("/")[-2].split("-")[0].title() + ":" + f.split(".")[-1]))
221
+
222
+ data_list.append("</tr>\n")
223
+ data_list.append("</tbody>\n")
224
+
225
+ data_list.append("</table>\n")
226
+ data_list.append(html_tail)
227
+ return data_list