pyeasyphd 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyeasyphd might be problematic. Click here for more details.

Files changed (80) hide show
  1. pyeasyphd/.python-version +1 -0
  2. pyeasyphd/Main.sublime-menu +43 -0
  3. pyeasyphd/__init__.py +0 -0
  4. pyeasyphd/bib/__init__.py +1 -0
  5. pyeasyphd/bib/bibtexbase/__init__.py +7 -0
  6. pyeasyphd/bib/bibtexbase/standardize/_base.py +36 -0
  7. pyeasyphd/bib/bibtexbase/standardize/default_data.py +97 -0
  8. pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +54 -0
  9. pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +38 -0
  10. pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +310 -0
  11. pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +35 -0
  12. pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +34 -0
  13. pyeasyphd/bib/bibtexbase/standardize_bib.py +75 -0
  14. pyeasyphd/bib/bibtexparser/__init__.py +47 -0
  15. pyeasyphd/bib/bibtexparser/bibtex_format.py +87 -0
  16. pyeasyphd/bib/bibtexparser/exceptions.py +64 -0
  17. pyeasyphd/bib/bibtexparser/library.py +207 -0
  18. pyeasyphd/bib/bibtexparser/middlewares/block/add.py +94 -0
  19. pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +22 -0
  20. pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +62 -0
  21. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +47 -0
  22. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +31 -0
  23. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +222 -0
  24. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +34 -0
  25. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +33 -0
  26. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +70 -0
  27. pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +15 -0
  28. pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +113 -0
  29. pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +34 -0
  30. pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +21 -0
  31. pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +28 -0
  32. pyeasyphd/bib/bibtexparser/middlewares/block/title.py +20 -0
  33. pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +98 -0
  34. pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +29 -0
  35. pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +124 -0
  36. pyeasyphd/bib/bibtexparser/middlewares/middleware.py +222 -0
  37. pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +13 -0
  38. pyeasyphd/bib/bibtexparser/middlewares/utils.py +226 -0
  39. pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +414 -0
  40. pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +42 -0
  41. pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +35 -0
  42. pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +29 -0
  43. pyeasyphd/bib/bibtexparser/model.py +481 -0
  44. pyeasyphd/bib/bibtexparser/splitter.py +151 -0
  45. pyeasyphd/bib/core/__init__.py +18 -0
  46. pyeasyphd/bib/core/convert_library_to_library.py +31 -0
  47. pyeasyphd/bib/core/convert_library_to_str.py +199 -0
  48. pyeasyphd/bib/core/convert_str_to_library.py +34 -0
  49. pyeasyphd/bib/core/convert_str_to_str.py +27 -0
  50. pyeasyphd/main/__init__.py +17 -0
  51. pyeasyphd/main/basic_input.py +149 -0
  52. pyeasyphd/main/pandoc_md_to.py +361 -0
  53. pyeasyphd/main/python_run_bib.py +73 -0
  54. pyeasyphd/main/python_run_md.py +235 -0
  55. pyeasyphd/main/python_run_tex.py +149 -0
  56. pyeasyphd/main/python_writers.py +212 -0
  57. pyeasyphd/pyeasyphd.py +72 -0
  58. pyeasyphd/pyeasyphd.sublime-settings +235 -0
  59. pyeasyphd/pyeasyphd.sublime-syntax +5 -0
  60. pyeasyphd/tools/__init__.py +30 -0
  61. pyeasyphd/tools/compare/compare_bibs.py +234 -0
  62. pyeasyphd/tools/experiments_base.py +203 -0
  63. pyeasyphd/tools/format_save_bibs.py +178 -0
  64. pyeasyphd/tools/generate/generate_from_bibs.py +447 -0
  65. pyeasyphd/tools/generate/generate_links.py +356 -0
  66. pyeasyphd/tools/py_run_bib_md_tex.py +378 -0
  67. pyeasyphd/tools/replace/replace.py +81 -0
  68. pyeasyphd/tools/search/data.py +318 -0
  69. pyeasyphd/tools/search/search_base.py +118 -0
  70. pyeasyphd/tools/search/search_core.py +326 -0
  71. pyeasyphd/tools/search/search_keywords.py +227 -0
  72. pyeasyphd/tools/search/search_writers.py +288 -0
  73. pyeasyphd/tools/search/utils.py +152 -0
  74. pyeasyphd/tools/spider/process_spider_bib.py +247 -0
  75. pyeasyphd/tools/spider/process_spider_url.py +74 -0
  76. pyeasyphd/tools/spider/process_spider_url_bib.py +62 -0
  77. pyeasyphd/utils/utils.py +62 -0
  78. pyeasyphd-0.0.2.dist-info/METADATA +27 -0
  79. pyeasyphd-0.0.2.dist-info/RECORD +80 -0
  80. pyeasyphd-0.0.2.dist-info/WHEEL +4 -0
@@ -0,0 +1,288 @@
1
+ import copy
2
+ import os
3
+ from typing import Dict, List, Tuple
4
+
5
+ from pyadvtools import (
6
+ combine_content_in_list,
7
+ read_list,
8
+ write_list,
9
+ )
10
+
11
+ from ...bib.bibtexparser import Library
12
+ from ...main import PandocMdTo, PythonWriters
13
+ from ...tools.search.utils import (
14
+ combine_keywords_for_file_name,
15
+ combine_keywords_for_title,
16
+ keywords_type_for_title,
17
+ )
18
+
19
+
20
+ class WriteInitialResult(object):
21
+ """Write initial results for single keyword.
22
+
23
+ Args:
24
+ options: dict
25
+
26
+ Attributes:
27
+ options (dict): options
28
+ """
29
+
30
+ def __init__(self, options: dict) -> None:
31
+ self.options = options
32
+
33
+ self._level_title_md = "###"
34
+ self._level_title_tex = "subsection"
35
+ self._pandoc_md_to = PandocMdTo(options)
36
+
37
+ def main(
38
+ self,
39
+ path_initial: str,
40
+ output_prefix: str,
41
+ field: str,
42
+ keywords_type: str,
43
+ combine_keywords: str,
44
+ library_for_abbr: Library,
45
+ library_for_zotero: Library,
46
+ library_for_save: Library,
47
+ ) -> Tuple[List[List[str]], List[str]]:
48
+ error_pandoc_md_md = []
49
+
50
+ # generate
51
+ cite_keys = [entry.key for entry in library_for_abbr.entries]
52
+
53
+ # update options
54
+ _options = copy.deepcopy(self.options)
55
+ _options["keep_entries_by_cite_keys"] = cite_keys
56
+ _python_writer = PythonWriters(_options)
57
+
58
+ # generate tex and md data
59
+ data_list_tex, data_list_md, header = self.generate_content_tex_md(
60
+ cite_keys, output_prefix, field, combine_keywords
61
+ )
62
+
63
+ # definition
64
+ file_prefix = combine_keywords_for_file_name(combine_keywords) # the file name prefix
65
+
66
+ # write tex, md, and bib files
67
+ data_list = [data_list_tex, data_list_md, library_for_abbr, library_for_zotero, library_for_save]
68
+ mid_list = ["", "", "-abbr", "-zotero", "-save"]
69
+ post_list = ["tex", "md", "bib", "bib", "bib"]
70
+ path_write = os.path.join(path_initial, f"{field}-{keywords_type}")
71
+ for i in range(len(post_list)):
72
+ file_name = f"{file_prefix}{mid_list[i]}.{post_list[i]}"
73
+ _python_writer.write_to_file(data_list[i], file_name, "w", path_write)
74
+
75
+ # pandoc md to generate md file
76
+ path_bib = f"{path_write}/{file_prefix}{mid_list[2]}.bib" # bib_for_abbr
77
+ data_list_pandoc_md = self._pandoc_md_to.pandoc_md_to_md(
78
+ path_bib,
79
+ path_write,
80
+ path_write,
81
+ f"{file_prefix}.md",
82
+ f"{file_prefix}-pandoc.md",
83
+ )
84
+
85
+ # mian part
86
+ # generate some md output data
87
+ data_basic_md: List[str] = []
88
+ data_beauty_md: List[str] = []
89
+ data_complex_md: List[str] = []
90
+ if data_list_pandoc_md:
91
+ data_basic_md, data_beauty_md, data_complex_md = self.generate_basic_beauty_complex_md(
92
+ header, cite_keys, data_list_pandoc_md, library_for_zotero
93
+ )
94
+ else:
95
+ error_pandoc_md_md.append(f"- pandoc full false: {file_prefix}_pandoc.md" + "\n")
96
+
97
+ # write basic beauty complex md files
98
+ basic_beauty_complex = ["-basic", "-beauty", "-complex"]
99
+ for d, name in zip([data_basic_md, data_beauty_md, data_complex_md], basic_beauty_complex):
100
+ write_list(d, "{}{}.md".format(file_prefix, name), "w", path_write)
101
+
102
+ # save all (tex, md, bib) files
103
+ x = [f"{i}.{j}" for i, j in zip(mid_list, post_list)]
104
+ x.extend([f"{i}.md" for i in basic_beauty_complex])
105
+ data_temp = [[os.path.join(path_write, file_prefix + i)] for i in x]
106
+ return data_temp, error_pandoc_md_md
107
+
108
+ def generate_basic_beauty_complex_md(
109
+ self, header: str, cite_key_list: List[str], data_list_pandoc_md: List[str], library_for_zotero: Library
110
+ ) -> Tuple[List[str], List[str], List[str]]:
111
+ data_basic_md, data_beauty_md, data_complex_md = [], [], []
112
+
113
+ # library
114
+ _options = copy.deepcopy(self.options)
115
+ _python_writer = PythonWriters(_options)
116
+ key_url_http_bib_dict = _python_writer.output_key_url_http_bib_dict(library_for_zotero)
117
+
118
+ key_basic_dict, key_beauty_dict, key_complex_dict = self._pandoc_md_to.generate_key_data_dict(
119
+ data_list_pandoc_md, key_url_http_bib_dict
120
+ )
121
+
122
+ if key_basic_dict and key_beauty_dict and key_complex_dict:
123
+ data_basic_md, data_beauty_md, data_complex_md = [header + "\n"], [header + "\n"], [header + "\n"]
124
+ for i in range(length := len(cite_key_list)):
125
+ data_basic_md.extend(self._convert_to_special_list(key_basic_dict.get(cite_key_list[i], [])))
126
+ data_beauty_md.extend(self._convert_to_special_list(key_beauty_dict.get(cite_key_list[i], [])))
127
+ data_complex_md.extend(self._convert_to_special_list(key_complex_dict.get(cite_key_list[i], [])))
128
+ if i < (length - 1):
129
+ data_basic_md.append("\n")
130
+ data_beauty_md.append("\n")
131
+ data_complex_md.append("\n")
132
+ return data_basic_md, data_beauty_md, data_complex_md
133
+
134
+ @staticmethod
135
+ def _convert_to_special_list(data_list: List[str]) -> List[str]:
136
+ if len(data_list) > 0:
137
+ data_list[0] = "- " + data_list[0]
138
+ for j in range(len(data_list) - 1):
139
+ if data_list[j][-1] == "\n":
140
+ data_list[j + 1] = " " + data_list[j + 1]
141
+ return data_list
142
+
143
+ def generate_content_tex_md(
144
+ self, cite_key_list: List[str], output_prefix: str, field: str, combine_keywords: str
145
+ ) -> Tuple[List[str], List[str], str]:
146
+ """Generate."""
147
+ c_k_f_t = combine_keywords_for_title(combine_keywords)
148
+
149
+ number_references = len(cite_key_list)
150
+ _title = f"{output_prefix} {field} contains {number_references} {c_k_f_t}"
151
+
152
+ tex_header = f"\\{self._level_title_tex}" + "{" + _title + "}\n"
153
+ tex_body = ["\\nocite{" + f"{c_k}" + "}\n" for c_k in cite_key_list]
154
+ tex_tail = "\\printbibliography\n\n\\ifx \\clearPage \\undefined \\else \\clearpage \\fi\n"
155
+ data_list_tex = combine_content_in_list([[tex_header], ["\n"], tex_body, ["\n"], [tex_tail]])
156
+
157
+ md_header = f"{self._level_title_md}" + " " + _title + "\n"
158
+ md_body = [r"- [@" + f"{c_k}" + "]\n" for c_k in cite_key_list]
159
+ data_list_md = combine_content_in_list([[md_header], ["\n"], md_body])
160
+ return data_list_tex, data_list_md, md_header
161
+
162
+
163
+ class WriteSeparateResult(object):
164
+ """Write separate result."""
165
+
166
+ def __init__(self) -> None:
167
+ self._level_title_md = "##"
168
+ self._level_title_tex = "section"
169
+
170
+ def main(
171
+ self, data_temp: List[List[str]], field: str, keywords_type: str, combine_keywords: str, path_separate: str
172
+ ) -> None:
173
+ k_t_f_t = keywords_type_for_title(keywords_type)
174
+ _title = f"{field.title()} contains {k_t_f_t}"
175
+
176
+ file_prefix = combine_keywords_for_file_name(combine_keywords) # the file name prefix
177
+ mid_list = ["", "", "-abbr", "-zotero", "-save", "-basic", "-beauty", "-complex"]
178
+ post_list = ["tex", "md", "bib", "bib", "bib", "md", "md", "md"]
179
+
180
+ len_data_temp = len(data_temp) # len(data_temp) = len(mid_list) = len(post_list) = 8
181
+ split_flag = mid_list.index("-abbr")
182
+
183
+ for i in range(split_flag, len_data_temp):
184
+ path_temp = os.path.join(path_separate, f"{keywords_type}", f"{field}-{post_list[i]}{mid_list[i]}")
185
+ full_file = os.path.join(path_temp, rf"{file_prefix}.{post_list[i]}")
186
+ temp_data_list = read_list(data_temp[i][0], "r", None)
187
+ if not os.path.isfile(full_file):
188
+ if post_list[i] == "md":
189
+ temp_data_list.insert(0, f"{self._level_title_md}" + " " + _title + "\n\n")
190
+ elif post_list[i] == "tex":
191
+ temp_data_list.insert(0, f"\\{self._level_title_tex}" + "{" + _title + "}\n\n")
192
+ else:
193
+ temp_data_list.insert(0, "\n")
194
+ write_list(temp_data_list, full_file, "a", None, False, False) # Compulsory `a`
195
+ return None
196
+
197
+
198
+ class WriteAbbrCombinedResults(object):
199
+ """Write combined results for abbr (such as `TEVC`, `PNAS`).
200
+
201
+ Args:
202
+ options: dict
203
+
204
+ Attributes:
205
+ options (dict): options
206
+ pandoc_md_basic_to_pdf (bool): whether to convert basic md to pdf
207
+ pandoc_md_beauty_to_pdf (bool): whether to convert beauty md to pdf
208
+ pandoc_md_complex_to_pdf (bool): whether to convert complex md to pdf
209
+ pandoc_md_basic_to_html (bool): whether to convert basic md to html
210
+ pandoc_md_beauty_to_html (bool): whether to convert beauty md to html
211
+ pandoc_md_complex_to_html (bool): whether to convert complex md to html
212
+
213
+ """
214
+
215
+ def __init__(self, options: dict) -> None:
216
+ self.pandoc_md_basic_to_pdf: bool = options.get("pandoc_md_basic_to_pdf", False)
217
+ self.pandoc_md_beauty_to_pdf: bool = options.get("pandoc_md_beauty_to_pdf", False)
218
+ self.pandoc_md_complex_to_pdf: bool = options.get("pandoc_md_complex_to_pdf", False)
219
+ self.pandoc_md_basic_to_html: bool = options.get("pandoc_md_basic_to_html", False)
220
+ self.pandoc_md_beauty_to_html: bool = options.get("pandoc_md_beauty_to_html", False)
221
+ self.pandoc_md_complex_to_html: bool = options.get("pandoc_md_complex_to_html", True)
222
+
223
+ self._level_title_md = "##"
224
+ self._level_title_tex = "section"
225
+ self._pandoc_md_to = PandocMdTo(options)
226
+
227
+ def main(
228
+ self, search_field_list, keywords_type: str, field_data_dict: Dict[str, List[List[str]]], path_combine: str
229
+ ) -> Tuple[List[str], List[str]]:
230
+ path_subsection = os.path.join(path_combine, "tex-subsection")
231
+ path_md = os.path.join(path_combine, "md")
232
+ path_bib = os.path.join(path_combine, "bib")
233
+
234
+ mid_list = ["", "", "-abbr", "-zotero", "-save", "-basic", "-beauty", "-complex"]
235
+ post_list = ["tex", "md", "bib", "bib", "bib", "md", "md", "md"]
236
+ path_list = [path_subsection, path_md, path_bib, path_bib, path_bib]
237
+ for i in ["-basic", "-beauty", "-complex"]:
238
+ path_list.append(os.path.join(path_combine, f"md{i}"))
239
+ # len(mid_list) == len(post_list) == len(path_list) == 8
240
+
241
+ k_t_f_t = keywords_type_for_title(keywords_type)
242
+
243
+ error_pandoc_md_pdf, error_pandoc_md_html = [], []
244
+ for field in search_field_list:
245
+ if not field_data_dict.get(field):
246
+ continue
247
+
248
+ # write files
249
+ file_prefix = f"{field}-{keywords_type}" # the file name prefix
250
+ _title = f"{field.title()} contains {k_t_f_t}"
251
+
252
+ for j in range(0, len(post_list)):
253
+ temp = combine_content_in_list([read_list(file, "r") for file in field_data_dict[field][j]], ["\n"])
254
+ if post_list[j] == "md":
255
+ temp.insert(0, f"{self._level_title_md}" + " " + _title + "\n\n")
256
+ elif post_list[j] == "tex":
257
+ temp.insert(0, f"\\{self._level_title_tex}" + "{" + _title + "}\n\n")
258
+ write_list(temp, f"{file_prefix}{mid_list[j]}.{post_list[j]}", "w", path_list[j])
259
+
260
+ # generate tex pdf html
261
+ # for tex
262
+ self._pandoc_md_to.generate_tex_content(file_prefix, path_subsection, path_bib, path_combine)
263
+
264
+ # for pdf
265
+ for i in ["basic", "beauty", "complex"]:
266
+ if eval(f"self.pandoc_md_{i}_to_pdf"):
267
+ error_flag_pdf = self._pandoc_md_to.pandoc_md_to_pdf(
268
+ os.path.join(path_combine, f"md-{i}"),
269
+ f"{file_prefix}-{i}.md",
270
+ os.path.join(path_combine, f"pdf-{i}"),
271
+ f"{file_prefix}-{i}.pdf",
272
+ )
273
+ if error_flag_pdf:
274
+ error_pandoc_md_pdf.append(error_flag_pdf)
275
+
276
+ # for html
277
+ for i in ["basic", "beauty", "complex"]:
278
+ if eval(f"self.pandoc_md_{i}_to_html"):
279
+ error_flag_html = self._pandoc_md_to.pandoc_md_to_html(
280
+ os.path.join(path_combine, f"md-{i}"),
281
+ os.path.join(path_combine, f"html-{i}"),
282
+ f"{file_prefix}-{i}.md",
283
+ f"{file_prefix}-{i}.html",
284
+ True
285
+ )
286
+ if error_flag_html:
287
+ error_pandoc_md_html.append(error_flag_html)
288
+ return error_pandoc_md_pdf, error_pandoc_md_html
@@ -0,0 +1,152 @@
1
+ import os
2
+ import re
3
+ from typing import Dict, List, Tuple, Union
4
+
5
+ from pyadvtools import (
6
+ IterateSortDict,
7
+ is_list_contain_list_contain_str,
8
+ is_list_contain_str,
9
+ write_list,
10
+ )
11
+
12
+
13
+ def switch_keywords_list(xx: Union[List[str], List[List[str]]]) -> Tuple[List[List[str]], str]:
14
+ """Switch keyword.
15
+
16
+ Input: ["evolutionary", "algorithm"] or [["evolution"], ["evolutionary"]]
17
+ Output: [["evolutionary", "algorithm"]] or [["evolution"], ["evolutionary"]]
18
+ """
19
+ yyy: List[List[str]] = [[]]
20
+
21
+ if is_list_contain_str(xx):
22
+ yyy = [[rf"\b{x}\b" for x in xx]]
23
+ elif is_list_contain_list_contain_str(xx):
24
+ if len(xx) == 1:
25
+ yyy = [[rf"\b{x}\b" for x in xx[0]]]
26
+ elif len(xx) == 2:
27
+ yyy = [[rf"\b{x}\b" for x in xx[0]], [rf"\b{x}\b" for x in xx[1]]]
28
+ else:
29
+ print(f"Not standard keywords: {xx}")
30
+ return yyy, ""
31
+ else:
32
+ return yyy, ""
33
+
34
+ combine_keywords = "_and_".join(yyy[0])
35
+ if len(yyy) == 2:
36
+ combine_keywords += "_without_{}".format("_and_".join(yyy[1]))
37
+
38
+ # ['evol(?:ution|utionary) strateg(?:y|ies)', 'population(?:| |-)based', 'network(?:|s)']
39
+ # '\bevol(?:ution|utionary) strateg(?:y|ies)\b_and_\bpopulation(?:| |-)based\b_and_\bnetwork(?:|s)\b'
40
+ combine_keywords = combine_keywords.replace(r"\b", "")
41
+ # 'evol(?:ution|utionary) strateg(?:y|ies)_and_population(?:| |-)based_and_network(?:|s)'
42
+ combine_keywords = re.sub(r"\(\?:[\w\s\-|]*\) ", "0 ", combine_keywords)
43
+ # 'evol0 strateg(?:y|ies)_and_population(?:| |-)based_and_network(?:|s)'
44
+ combine_keywords = re.sub(r"\(\?:[\w\s\-|]*\)$", "1", combine_keywords)
45
+ # 'evol0 strateg(?:y|ies)_and_population(?:| |-)based_and_network1'
46
+ combine_keywords = re.sub(r"\(\?:[\w\s\-|]*\)_", "2_", combine_keywords)
47
+ # 'evol0 strateg2_and_population(?:| |-)based_and_network1'
48
+ combine_keywords = re.sub(r"\(\?:[\w\s\-|]*\)", "3", combine_keywords)
49
+ # 'evol0 strateg2_and_population3based_and_network1'
50
+ combine_keywords = combine_keywords.replace("/", "4")
51
+ combine_keywords = combine_keywords.replace(" ", "5")
52
+ # 'evol05strateg2_and_population3based_and_network1'
53
+ return yyy, combine_keywords
54
+
55
+
56
+ def combine_keywords_for_title(combine_keywords: str) -> str:
57
+ combine_keywords = combine_keywords.replace("_without_", " without ")
58
+ combine_keywords = combine_keywords.replace("_and_", "; ")
59
+ combine_keywords = combine_keywords.replace("0", "")
60
+ combine_keywords = combine_keywords.replace("1", "")
61
+ combine_keywords = combine_keywords.replace("2", "")
62
+ combine_keywords = combine_keywords.replace("3", "-")
63
+ combine_keywords = combine_keywords.replace("4", "/") #
64
+ combine_keywords = combine_keywords.replace("5", " ")
65
+ return combine_keywords
66
+
67
+
68
+ def combine_keywords_for_file_name(combine_keywords: str) -> str:
69
+ combine_keywords = combine_keywords_for_title(combine_keywords)
70
+ combine_keywords = combine_keywords.replace("/", "-")
71
+ combine_keywords = combine_keywords.replace("; ", "_and_")
72
+ combine_keywords = combine_keywords.replace(" ", "_")
73
+ return combine_keywords
74
+
75
+
76
+ def switch_keywords_type(keywords_type: str) -> str:
77
+ keywords_type = keywords_type.replace("/", "-")
78
+ keywords_type = keywords_type.replace(" ", "_")
79
+ keywords_type = re.sub(r"-+", "-", keywords_type)
80
+ keywords_type = re.sub(r"_+", "_", keywords_type)
81
+ return keywords_type.strip()
82
+
83
+
84
+ def keywords_type_for_title(keywords_type: str) -> str:
85
+ keywords_type = keywords_type.replace("_", " ")
86
+ return keywords_type.strip()
87
+
88
+
89
+ def extract_information(old_dict: Dict[str, Dict[str, Dict[str, Dict[str, Dict[str, int]]]]], path_output: str) -> None:
90
+ new_dict: Dict[str, Dict[str, Dict[str, Dict[str, Dict[str, int]]]]] = {}
91
+
92
+ for abbr in old_dict:
93
+ for entry_type in old_dict[abbr]:
94
+ for keyword_type in old_dict[abbr][entry_type]:
95
+ for keyword in old_dict[abbr][entry_type][keyword_type]:
96
+ for field in old_dict[abbr][entry_type][keyword_type][keyword]:
97
+ no = old_dict[abbr][entry_type][keyword_type][keyword][field]
98
+ (
99
+ new_dict.setdefault(entry_type, {})
100
+ .setdefault(field, {})
101
+ .setdefault(keyword_type, {})
102
+ .setdefault(keyword, {})
103
+ .update({abbr: no})
104
+ )
105
+
106
+ new_dict = IterateSortDict(False).dict_update(new_dict)
107
+
108
+ for entry_type in new_dict:
109
+ for field in new_dict[entry_type]:
110
+ data_list = []
111
+ for keyword_type in new_dict[entry_type][field]:
112
+ for keyword in new_dict[entry_type][field][keyword_type]:
113
+ abbr_list = sorted(list(new_dict[entry_type][field][keyword_type][keyword].keys()))
114
+ num_list = [new_dict[entry_type][field][keyword_type][keyword][abbr] for abbr in abbr_list]
115
+
116
+ a = f'|Keywords Types|Keywords|{"|".join([abbr for abbr in abbr_list])}|\n'
117
+ if a not in data_list:
118
+ data_list.append(a)
119
+
120
+ b = f'|-|-|{"|".join(["-" for _ in abbr_list])}|\n'
121
+ if b not in data_list:
122
+ data_list.append(b)
123
+
124
+ keyword = combine_keywords_for_file_name(keyword)
125
+ data_list.append(f'|{keyword_type}|{keyword}|{"|".join([str(n) for n in num_list])}|\n')
126
+
127
+ write_list(data_list, f"{field}-keywords_count.md", "w", os.path.join(path_output, entry_type), False)
128
+
129
+
130
+ temp_html_style = """ <style>
131
+ html {font-size: 19px;}
132
+ body {margin: 0 auto; max-width: 22em;}
133
+ table {
134
+ border-collapse: collapse;
135
+ border: 2px solid rgb(200,200,200);
136
+ letter-spacing: 1px;
137
+ font-size: 0.8rem;
138
+ }
139
+ td, th {
140
+ border: 1px solid rgb(190,190,190);
141
+ padding: 10px 20px;
142
+ }
143
+ td {text-align: center;}
144
+ caption {padding: 12px;}
145
+ </style>
146
+ </head>
147
+ <body>
148
+ """
149
+
150
+
151
+ if __name__ == "__main__":
152
+ pass
@@ -0,0 +1,247 @@
1
+ import copy
2
+ import os
3
+ import re
4
+ import time
5
+
6
+ from pyadvtools import (
7
+ IterateCombineExtendDict,
8
+ iterate_obtain_full_file_names,
9
+ read_list,
10
+ standard_path,
11
+ write_list,
12
+ )
13
+
14
+ from ...bib.bibtexparser.library import Library
15
+ from ...main import PythonRunBib, PythonWriters
16
+ from ..experiments_base import generate_readme
17
+ from ..format_save_bibs import format_entries_for_abbr_zotero_save, generate_statistic_information
18
+
19
+ EXCLUDE_ABBR_LIST = ["arxiv", "biorxiv", "ssrn"]
20
+
21
+
22
+ class ProcessSpiderBib(object):
23
+ """Process spider bib.
24
+
25
+ Args:
26
+ path_abbr: The path of the abbreviation folder.
27
+ abbr_standard: The standard abbreviation.
28
+
29
+ Attributes:
30
+ path_abbr: The path of the abbreviation folder.
31
+ abbr_standard: The standard abbreviation.
32
+ """
33
+
34
+ def __init__(self, path_abbr: str, abbr_standard: str) -> None:
35
+ self.path_abbr = os.path.expandvars(os.path.expanduser(path_abbr))
36
+ self.abbr_standard = abbr_standard
37
+
38
+ self._options = {
39
+ "is_standardize_bib": True, # default is True
40
+ "substitute_old_list": [
41
+ r"(<[a-zA-Z\-]+\s*/*\s*>)",
42
+ r'(</[a-zA-Z\-]+>)',
43
+ r'(<[a-zA-Z\-]+ [^\s]+="[^>]+?"\s*/*\s*>)',
44
+ r"([ ]+)",
45
+ r";[; ]*;",
46
+ r",[, ]*,"
47
+ ],
48
+ "substitute_new_list": ["", "", "", " ", ";", ","],
49
+ "is_sort_entry_fields": True, # default is False
50
+ "is_sort_blocks": True, # default is False
51
+ "sort_entries_by_field_keys_reverse": False, # default is True
52
+ "empty_entry_cite_keys": True,
53
+ }
54
+
55
+ self._python_bib = PythonRunBib(self._options)
56
+
57
+ def format_spider_bib(self, write_bib: bool = False) -> None:
58
+ """Format spider bib."""
59
+ file_list = iterate_obtain_full_file_names(self.path_abbr, ".bib", False)
60
+
61
+ if write_bib:
62
+ if os.path.exists(readme := os.path.join(self.path_abbr, "README.md")):
63
+ os.remove(readme)
64
+
65
+ _options = {}
66
+ _options.update(self._options)
67
+ _python_writer = PythonWriters(_options)
68
+
69
+ for f in file_list:
70
+ print("*" * 5 + f" Format {os.path.basename(f)} " + "*" * 5)
71
+
72
+ data_list = read_list(f, "r")
73
+
74
+ # standardize
75
+ entry_type_year_volume_number_month_entry_dict = self._python_bib.parse_to_nested_entries_dict(data_list)
76
+ if not write_bib:
77
+ continue
78
+
79
+ # just for the necessary part
80
+ old_readme_md = [re.sub(r"[ ]+", "", line) for line in read_list("README.md", "r", self.path_abbr)]
81
+ new_readme_md = []
82
+ new_entry_list = []
83
+
84
+ for entry_type in entry_type_year_volume_number_month_entry_dict:
85
+ new_dict = entry_type_year_volume_number_month_entry_dict.get(entry_type.lower(), {})
86
+
87
+ # for README.md
88
+ readme_md = generate_readme(self.abbr_standard, entry_type.lower(), new_dict)
89
+ readme_md = readme_md[3:] if (old_readme_md or new_readme_md) else readme_md
90
+ readme_md = [line for line in readme_md if re.sub(r"[ ]+", "", line) not in old_readme_md]
91
+ new_readme_md.extend(readme_md)
92
+
93
+ # for bib
94
+ entry_list = IterateCombineExtendDict().dict_update(copy.deepcopy(new_dict))
95
+ new_entry_list.extend(entry_list)
96
+
97
+ write_list(new_readme_md, "README.md", "a", self.path_abbr, False)
98
+ _python_writer.write_to_file(new_entry_list, f, "w", None, False)
99
+ return None
100
+
101
+ def check_spider_bib(self, delete_duplicate_in_bibs: bool = False) -> None:
102
+ """Check bib."""
103
+ bibs_name = iterate_obtain_full_file_names(self.path_abbr, ".bib", False)
104
+ bibs_name = [[f, os.path.basename(f).split(".")[0].strip()] for f in bibs_name]
105
+
106
+ urls_name = iterate_obtain_full_file_names(self.path_abbr, ".csv", False)
107
+ urls_name = [[f, os.path.basename(f).split(".")[0].strip()] for f in urls_name]
108
+
109
+ url_base_names = [name[-1] for name in urls_name]
110
+
111
+ _options = {}
112
+ _options.update(self._options)
113
+ _python_writer = PythonWriters(_options)
114
+
115
+ for name in bibs_name:
116
+ bib_base_name = name[-1]
117
+ if bib_base_name not in url_base_names:
118
+ print(f"{bib_base_name}.csv not in the folder `url`.")
119
+ continue
120
+
121
+ full_bib, full_url = name[0], urls_name[url_base_names.index(bib_base_name)][0]
122
+
123
+ print("*" * 5 + f" Check {os.path.basename(full_bib)} and {os.path.basename(full_url)} " + "*" * 5)
124
+ bib_list = read_list(full_bib, "r")
125
+
126
+ # Check duplicated blocks in bib file
127
+ library = self._python_bib.parse_to_single_standard_library(bib_list)
128
+
129
+ url_bib_dict = {}
130
+ for entry in library.entries:
131
+ doi = entry["doi"] if "doi" in entry else ""
132
+ url_ = entry["url"] if "url" in entry else ""
133
+ url = doi if doi else url_
134
+ url_bib_dict.setdefault(url, []).append(entry)
135
+
136
+ duplicate_url, new_entries = [], []
137
+ for url in url_bib_dict:
138
+ if len(url_bib_dict[url]) > 1:
139
+ duplicate_url.append(url)
140
+ if delete_duplicate_in_bibs:
141
+ new_entries.append(url_bib_dict[url][0])
142
+
143
+ # Delete duplicated blocks in bib file
144
+ if duplicate_url:
145
+ print(f"Duplicates in {full_bib}: {duplicate_url}\n")
146
+ if duplicate_url and delete_duplicate_in_bibs:
147
+ _python_writer.write_to_file(new_entries, full_bib, "w", None, False)
148
+ return None
149
+
150
+ def move_spider_bib(self, path_shutil: str) -> None:
151
+ if self.abbr_standard.lower() in EXCLUDE_ABBR_LIST:
152
+ return None
153
+
154
+ # Move
155
+ print("*" * 5 + f" Start moving {self.abbr_standard} ... " + "*" * 5)
156
+ path_move = os.path.join(path_shutil, self.abbr_standard)
157
+ entry_type_entry_dict = {}
158
+ library = PythonRunBib({}).parse_to_single_standard_library(self.path_abbr)
159
+ for entry in library.entries:
160
+ entry_type_entry_dict.setdefault(entry.entry_type, []).append(entry)
161
+ for entry_type in entry_type_entry_dict:
162
+ format_entries_for_abbr_zotero_save(
163
+ self.abbr_standard,
164
+ path_move,
165
+ Library(entry_type_entry_dict[entry_type]),
166
+ combine_year_length=1,
167
+ default_year_list=self._default_year_list(entry_type),
168
+ write_flag_bib="a",
169
+ check_bib_exist=False,
170
+ write_flag_readme="a",
171
+ check_md_exist=False,
172
+ options=self._options,
173
+ )
174
+ generate_statistic_information(path_move)
175
+ print("*" * 5 + " Successfully moving ... " + "*" * 5)
176
+
177
+ # Delete
178
+ _options = {}
179
+ _options.update(self._options)
180
+ _python_writer = PythonWriters(_options)
181
+ print("*" * 5 + f" Start deleting {self.abbr_standard} ... " + "*" * 5)
182
+ bibs = iterate_obtain_full_file_names(self.path_abbr, ".bib")
183
+ for bib in bibs:
184
+ new_entries = []
185
+ library = self._python_bib.parse_to_single_standard_library(read_list(bib, "r"))
186
+ for entry in library.entries:
187
+ year = entry["year"] if "year" in entry else ""
188
+ if year not in self._default_year_list(entry.entry_type):
189
+ new_entries.append(entry)
190
+ _python_writer.write_to_file(new_entries, bib, "w", None, False, True, True, False, True)
191
+ print("*" * 5 + " Successfully deleting ... " + "*" * 5)
192
+
193
+ @staticmethod
194
+ def _default_year_list(entry_type) -> list:
195
+ year = int(time.strftime("%Y", time.localtime()))
196
+ month = int(time.strftime("%m", time.localtime()))
197
+ m = 0 if month <= 3 else 1
198
+ if entry_type == "article":
199
+ default_year_list = [str(i) for i in range(1800, year + m - 1)]
200
+ elif entry_type == "inproceedings":
201
+ default_year_list = [str(i) for i in range(1800, year + 2)]
202
+ else:
203
+ default_year_list = [str(i) for i in range(1800, year + m - 1)]
204
+ return default_year_list
205
+
206
+ def simplify_early_access(self):
207
+ # for IEEE Early Access
208
+ path_ieee_early_access = self.path_abbr
209
+ path_ieee = path_ieee_early_access.replace("spider_j_e", "spider_j")
210
+
211
+ _options = {}
212
+ _options.update(self._options)
213
+ _python_writer = PythonWriters(_options)
214
+
215
+ print(f"***** Simplify {self.abbr_standard} *****")
216
+ path_url_ieee_early_access = os.path.join(path_ieee_early_access, "url")
217
+ path_bib_ieee_early_access = os.path.join(path_ieee_early_access, "bib")
218
+ path_url_ieee = os.path.join(path_ieee, "url")
219
+
220
+ # for txt urls
221
+ data_list = read_list(f"{self.abbr_standard}_0.txt", "r", path_url_ieee_early_access)
222
+ for name in [f for f in os.listdir(path_url_ieee) if f.endswith(".txt")]:
223
+ temp_data_list = read_list(name, "r", path_url_ieee)
224
+ data_list = list(set(data_list).difference(set(temp_data_list)))
225
+ write_list(sorted(data_list), f"{self.abbr_standard}_0.txt", "w", path_url_ieee_early_access, False)
226
+
227
+ # for csv urls
228
+ data_list_csv = read_list(f"{self.abbr_standard}_0.csv", "r", path_url_ieee_early_access)
229
+ data_list_txt = read_list(f"{self.abbr_standard}_0.txt", "r", path_url_ieee_early_access)
230
+ data_list = list(set(data_list_csv).intersection(set(data_list_txt)))
231
+ write_list(sorted(data_list), f"{self.abbr_standard}_0.csv", "w", path_url_ieee_early_access, False)
232
+
233
+ # for bibs
234
+ data_list_bib = read_list(f"{self.abbr_standard}_0.bib", "r", path_bib_ieee_early_access)
235
+ data_list_url = read_list(f"{self.abbr_standard}_0.txt", "r", path_url_ieee_early_access)
236
+
237
+ entries = []
238
+ library = self._python_bib.parse_to_single_standard_library(data_list_bib)
239
+ for url in data_list_url:
240
+ for entry in library.entries:
241
+ if standard_path(url) == standard_path(entry["url"]):
242
+ entries.append(entry)
243
+ break
244
+
245
+ _python_writer.write_to_file(
246
+ entries, f"{self.abbr_standard}_0.bib", "w", path_bib_ieee_early_access, False, True, True, True
247
+ )