pyeasyphd 0.4.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyeasyphd/.python-version +1 -0
- pyeasyphd/Main.sublime-menu +43 -0
- pyeasyphd/__init__.py +5 -0
- pyeasyphd/data/templates/csl/apa-no-ampersand.csl +2183 -0
- pyeasyphd/data/templates/csl/apa.csl +2133 -0
- pyeasyphd/data/templates/csl/ieee.csl +512 -0
- pyeasyphd/data/templates/tex/Article.tex +38 -0
- pyeasyphd/data/templates/tex/Article_Header.tex +29 -0
- pyeasyphd/data/templates/tex/Article_Tail.tex +3 -0
- pyeasyphd/data/templates/tex/Beamer_Header.tex +79 -0
- pyeasyphd/data/templates/tex/Beamer_Tail.tex +14 -0
- pyeasyphd/data/templates/tex/Style.tex +240 -0
- pyeasyphd/data/templates/tex/TEVC_Header.tex +52 -0
- pyeasyphd/data/templates/tex/TEVC_Tail.tex +4 -0
- pyeasyphd/data/templates/tex/eisvogel.tex +1064 -0
- pyeasyphd/data/templates/tex/math.tex +201 -0
- pyeasyphd/data/templates/tex/math_commands.tex +677 -0
- pyeasyphd/data/templates/tex/nextaimathmacros.sty +681 -0
- pyeasyphd/main/__init__.py +6 -0
- pyeasyphd/main/basic_input.py +101 -0
- pyeasyphd/main/pandoc_md_to.py +380 -0
- pyeasyphd/main/python_run_md.py +320 -0
- pyeasyphd/main/python_run_tex.py +200 -0
- pyeasyphd/pyeasyphd.py +86 -0
- pyeasyphd/pyeasyphd.sublime-settings +100 -0
- pyeasyphd/pyeasyphd.sublime-syntax +5 -0
- pyeasyphd/scripts/__init__.py +34 -0
- pyeasyphd/scripts/_base.py +65 -0
- pyeasyphd/scripts/run_article_md.py +101 -0
- pyeasyphd/scripts/run_article_tex.py +94 -0
- pyeasyphd/scripts/run_beamer_tex.py +84 -0
- pyeasyphd/scripts/run_compare.py +71 -0
- pyeasyphd/scripts/run_format.py +62 -0
- pyeasyphd/scripts/run_generate.py +211 -0
- pyeasyphd/scripts/run_replace.py +34 -0
- pyeasyphd/scripts/run_search.py +251 -0
- pyeasyphd/tools/__init__.py +12 -0
- pyeasyphd/tools/generate/generate_from_bibs.py +181 -0
- pyeasyphd/tools/generate/generate_html.py +166 -0
- pyeasyphd/tools/generate/generate_library.py +203 -0
- pyeasyphd/tools/generate/generate_links.py +400 -0
- pyeasyphd/tools/py_run_bib_md_tex.py +398 -0
- pyeasyphd/tools/search/data.py +282 -0
- pyeasyphd/tools/search/search_base.py +146 -0
- pyeasyphd/tools/search/search_core.py +400 -0
- pyeasyphd/tools/search/search_keywords.py +229 -0
- pyeasyphd/tools/search/search_writers.py +350 -0
- pyeasyphd/tools/search/utils.py +190 -0
- pyeasyphd/utils/utils.py +99 -0
- pyeasyphd-0.4.42.dist-info/METADATA +33 -0
- pyeasyphd-0.4.42.dist-info/RECORD +53 -0
- pyeasyphd-0.4.42.dist-info/WHEEL +4 -0
- pyeasyphd-0.4.42.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import shutil
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from pyadvtools import combine_content_in_list, read_list, standard_path, write_list
|
|
7
|
+
from pybibtexer.bib.bibtexparser import Library
|
|
8
|
+
from pybibtexer.main import PythonRunBib, PythonWriters
|
|
9
|
+
|
|
10
|
+
from ..main import BasicInput, PythonRunMd, PythonRunTex
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PyRunBibMdTex(BasicInput):
|
|
14
|
+
"""A class for processing BibTeX, Markdown and LaTeX files with various operations.
|
|
15
|
+
|
|
16
|
+
This class provides functionality to handle references, figures, and content conversion
|
|
17
|
+
between Markdown and LaTeX formats.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(
|
|
21
|
+
self,
|
|
22
|
+
path_output: str,
|
|
23
|
+
tex_md_flag: str = ".md",
|
|
24
|
+
template_name: str = "paper",
|
|
25
|
+
options: dict[str, Any] | None = None,
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Initialize the PyRunBibMdTex instance.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
path_output (str): Output directory path for processed files.
|
|
31
|
+
tex_md_flag (str, optional): Flag indicating whether to process as LaTeX (".tex") or Markdown (".md"). Defaults to ".md".
|
|
32
|
+
template_name (str, optional): Template type to use ("paper" or "beamer"). Defaults to "paper".
|
|
33
|
+
options (dict[str, Any], optional): Additional configuration options. Defaults to {}.
|
|
34
|
+
|
|
35
|
+
Raises:
|
|
36
|
+
AssertionError: If tex_md_flag is not ".tex" or ".md" or if template_name is not "paper" or "beamer".
|
|
37
|
+
"""
|
|
38
|
+
if options is None:
|
|
39
|
+
options = {}
|
|
40
|
+
|
|
41
|
+
super().__init__(options)
|
|
42
|
+
|
|
43
|
+
self.tex_md_flag = re.sub(r"\.+", ".", "." + tex_md_flag)
|
|
44
|
+
assert self.tex_md_flag in [".tex", ".md"], f"{tex_md_flag} must be `.tex` or `.md`."
|
|
45
|
+
self.template_name = template_name.lower()
|
|
46
|
+
assert self.template_name in ["paper", "beamer"], f"{template_name} must be `paper` or `beamer`."
|
|
47
|
+
self.path_output = standard_path(path_output)
|
|
48
|
+
|
|
49
|
+
# Bib
|
|
50
|
+
# Path to bibliographic data, can be either a directory path or a specific file path
|
|
51
|
+
self.bib_path_or_file = options.get("bib_path_or_file", "") # input
|
|
52
|
+
|
|
53
|
+
# Figures \includegraphics{/path/to/example.png}
|
|
54
|
+
# Path to the figures directory (must be a directory path, not a file)
|
|
55
|
+
self.includegraphics_figs_directory = options.get("includegraphics_figs_directory", "")
|
|
56
|
+
self.shutil_includegraphics_figs = options.get("shutil_includegraphics_figs", True)
|
|
57
|
+
self.includegraphics_figs_in_relative_path = options.get("includegraphics_figs_in_relative_path", True)
|
|
58
|
+
includegraphics_figs_postfixes = options.get("includegraphics_figs_postfixes")
|
|
59
|
+
if includegraphics_figs_postfixes is None:
|
|
60
|
+
includegraphics_figs_postfixes = ["eps", "jpg", "png", "svg", "psd", "raw", "jpeg", "pdf"]
|
|
61
|
+
self.includegraphics_figs_postfixes = includegraphics_figs_postfixes
|
|
62
|
+
|
|
63
|
+
# Texs (Texes) \input{/path/to/example.tex}
|
|
64
|
+
self.input_texs_directory = options.get("input_texs_directory", "")
|
|
65
|
+
self.shutil_input_texs = options.get("shutil_input_texs", True)
|
|
66
|
+
self.input_texs_in_relative_path = options.get("input_texs_in_relative_path", True)
|
|
67
|
+
input_texs_postfixes = options.get("input_texs_postfixes")
|
|
68
|
+
if input_texs_postfixes is None:
|
|
69
|
+
input_texs_postfixes = ["tex", "latex"]
|
|
70
|
+
self.input_texs_postfixes = input_texs_postfixes
|
|
71
|
+
|
|
72
|
+
# (output) Folder name configurations
|
|
73
|
+
self.fig_folder_name = options.get("fig_folder_name", "figs") # "" or "figs" or "main"
|
|
74
|
+
self.bib_folder_name = options.get("bib_folder_name", "bibs") # "" or "bibs" or "main"
|
|
75
|
+
self.md_folder_name = options.get("md_folder_name", "mds") # "" or "mds" or "main"
|
|
76
|
+
self.tex_folder_name = options.get("tex_folder_name", "texs") # "" or "texs" or "main"
|
|
77
|
+
|
|
78
|
+
# Cleanup options
|
|
79
|
+
self.delete_original_md_in_output_folder = options.get("delete_original_md_in_output_folder", False)
|
|
80
|
+
self.delete_original_tex_in_output_folder = options.get("delete_original_tex_in_output_folder", False)
|
|
81
|
+
self.delete_original_bib_in_output_folder = options.get("delete_original_bib_in_output_folder", False)
|
|
82
|
+
|
|
83
|
+
# Configuration options
|
|
84
|
+
self.generate_html = options.get("generate_html", False)
|
|
85
|
+
self.generate_tex = options.get("generate_tex", True)
|
|
86
|
+
|
|
87
|
+
# Initialize helper classes
|
|
88
|
+
self._python_bib = PythonRunBib(self.options)
|
|
89
|
+
self._python_writer = PythonWriters(self.options)
|
|
90
|
+
|
|
91
|
+
self._python_md = PythonRunMd(self.options)
|
|
92
|
+
self._python_tex = PythonRunTex(self.options)
|
|
93
|
+
|
|
94
|
+
def run_files(
|
|
95
|
+
self, file_list_md_tex: list[str], output_prefix: str = "", output_level: str = "next"
|
|
96
|
+
) -> tuple[list[str], list[str]]:
|
|
97
|
+
"""Process a list of Markdown or LaTeX files.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
file_list_md_tex (list[str]): list of input file paths (Markdown or LaTeX).
|
|
101
|
+
output_prefix (str, optional): Prefix for output files. Defaults to "".
|
|
102
|
+
output_level (str, optional): Output directory level ("previous", "current", or "next"). Defaults to "next".
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
tuple[list[str], list[str]]: Tuple containing processed Markdown content and LaTeX content.
|
|
106
|
+
"""
|
|
107
|
+
file_list_md_tex = [f for f in file_list_md_tex if f.endswith(self.tex_md_flag)]
|
|
108
|
+
data_list_list = [read_list(standard_path(f), "r") for f in file_list_md_tex]
|
|
109
|
+
if all(len(data_list) == 0 for data_list in data_list_list):
|
|
110
|
+
return [], []
|
|
111
|
+
|
|
112
|
+
file_base_name = os.path.splitext(os.path.basename(file_list_md_tex[0]))[0]
|
|
113
|
+
output_prefix = output_prefix if output_prefix else file_base_name
|
|
114
|
+
|
|
115
|
+
data_list_md_tex = combine_content_in_list(data_list_list, ["\n"])
|
|
116
|
+
|
|
117
|
+
content_md, content_tex = self.python_run_bib_md_tex(
|
|
118
|
+
output_prefix, data_list_md_tex, self.bib_path_or_file, output_level
|
|
119
|
+
)
|
|
120
|
+
return content_md, content_tex
|
|
121
|
+
|
|
122
|
+
def python_run_bib_md_tex(
|
|
123
|
+
self,
|
|
124
|
+
output_prefix: str,
|
|
125
|
+
data_list_md_tex: list[str],
|
|
126
|
+
original_bib_data: list[str] | str | Library,
|
|
127
|
+
output_level: str = "next",
|
|
128
|
+
) -> tuple[list[str], list[str]]:
|
|
129
|
+
"""Process BibTeX, Markdown and LaTeX content.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
output_prefix (str): Prefix for output files.
|
|
133
|
+
data_list_md_tex (list[str]): list of content lines (Markdown or LaTeX).
|
|
134
|
+
original_bib_data (list[str] | str | Library): BibTeX data in various formats.
|
|
135
|
+
output_level (str, optional): Output directory level ("previous", "current", or "next"). Defaults to "next".
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
tuple[list[str], list[str]]: Tuple containing processed Markdown content and LaTeX content.
|
|
139
|
+
"""
|
|
140
|
+
# Basic file names
|
|
141
|
+
output_tex, output_md = output_prefix + ".tex", output_prefix + ".md"
|
|
142
|
+
|
|
143
|
+
if len(data_list_md_tex) == 0:
|
|
144
|
+
original_bib_data = self._python_bib.parse_to_single_standard_library(original_bib_data)
|
|
145
|
+
if not original_bib_data.entries:
|
|
146
|
+
return [], []
|
|
147
|
+
|
|
148
|
+
data_list_md_tex = []
|
|
149
|
+
for entry in original_bib_data.entries:
|
|
150
|
+
data_list_md_tex.append(f"- [@{entry.key}]\n\n")
|
|
151
|
+
data_list_md_tex.insert(0, f"## {output_prefix} - {len(data_list_md_tex)}\n\n")
|
|
152
|
+
|
|
153
|
+
# Determine output path based on level
|
|
154
|
+
if output_level == "previous":
|
|
155
|
+
path_output = os.path.dirname(self.path_output)
|
|
156
|
+
elif output_level == "next":
|
|
157
|
+
path_output = os.path.join(self.path_output, output_prefix)
|
|
158
|
+
elif output_level == "current":
|
|
159
|
+
path_output = self.path_output
|
|
160
|
+
else:
|
|
161
|
+
path_output = self.path_output
|
|
162
|
+
|
|
163
|
+
if not os.path.exists(path_output):
|
|
164
|
+
os.makedirs(path_output)
|
|
165
|
+
self.path_output_new = standard_path(path_output)
|
|
166
|
+
|
|
167
|
+
return self._python_run_bib_md_tex(output_md, output_tex, data_list_md_tex, original_bib_data)
|
|
168
|
+
|
|
169
|
+
def _python_run_bib_md_tex(
|
|
170
|
+
self,
|
|
171
|
+
output_md: str,
|
|
172
|
+
output_tex: str,
|
|
173
|
+
data_list_md_tex: list[str],
|
|
174
|
+
original_bib_data: list[str] | str | Library,
|
|
175
|
+
) -> tuple[list[str], list[str]]:
|
|
176
|
+
"""Process BibTeX, Markdown and LaTeX content.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
output_md (str): Output Markdown filename.
|
|
180
|
+
output_tex (str): Output LaTeX filename.
|
|
181
|
+
data_list_md_tex (list[str]): list of content lines (Markdown or LaTeX).
|
|
182
|
+
original_bib_data (list[str] | str | Library): BibTeX data in various formats.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
tuple[list[str], list[str]]: Tuple containing processed Markdown content and LaTeX content.
|
|
186
|
+
"""
|
|
187
|
+
# Copy figures if enabled
|
|
188
|
+
if self.shutil_includegraphics_figs:
|
|
189
|
+
figure_names = self.search_subfile_names(data_list_md_tex, self.includegraphics_figs_postfixes)
|
|
190
|
+
self.shutil_copy_files(
|
|
191
|
+
self.includegraphics_figs_directory,
|
|
192
|
+
figure_names,
|
|
193
|
+
self.path_output_new,
|
|
194
|
+
self.fig_folder_name,
|
|
195
|
+
self.includegraphics_figs_in_relative_path,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Copy input texs (texes) if enabled
|
|
199
|
+
if self.shutil_input_texs:
|
|
200
|
+
input_tex_names = self.search_subfile_names(data_list_md_tex, self.input_texs_postfixes)
|
|
201
|
+
self.shutil_copy_files(
|
|
202
|
+
self.input_texs_directory,
|
|
203
|
+
input_tex_names,
|
|
204
|
+
self.path_output_new,
|
|
205
|
+
self.tex_folder_name,
|
|
206
|
+
self.input_texs_in_relative_path,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Extract citation keys from content
|
|
210
|
+
key_in_md_tex = self.search_cite_keys(data_list_md_tex, self.tex_md_flag)
|
|
211
|
+
|
|
212
|
+
# Process bibliography
|
|
213
|
+
full_bib_for_zotero, full_bib_for_abbr, full_bib_for_save = "", "", ""
|
|
214
|
+
if key_in_md_tex:
|
|
215
|
+
# Generate bib contents
|
|
216
|
+
abbr_library, zotero_library, save_library = self._python_bib.parse_to_multi_standard_library(
|
|
217
|
+
original_bib_data, key_in_md_tex
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Only for existing references
|
|
221
|
+
key_in_md_tex = sorted(abbr_library.entries_dict.keys(), key=key_in_md_tex.index)
|
|
222
|
+
|
|
223
|
+
# Write bibliography files
|
|
224
|
+
_path_output = os.path.join(self.path_output_new, self.bib_folder_name)
|
|
225
|
+
full_bib_for_abbr, full_bib_for_zotero, full_bib_for_save = (
|
|
226
|
+
self._python_writer.write_multi_library_to_multi_file(
|
|
227
|
+
_path_output, abbr_library, zotero_library, save_library, key_in_md_tex
|
|
228
|
+
)
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Process content based on format
|
|
232
|
+
if self.tex_md_flag == ".md":
|
|
233
|
+
# Write original markdown content
|
|
234
|
+
write_list(data_list_md_tex, output_md, "w", os.path.join(self.path_output_new, self.md_folder_name), False)
|
|
235
|
+
|
|
236
|
+
# Generate processed content and write to given files
|
|
237
|
+
data_list_md, data_list_tex = self._python_md.special_operate_for_md(
|
|
238
|
+
self.path_output_new,
|
|
239
|
+
data_list_md_tex,
|
|
240
|
+
output_md,
|
|
241
|
+
full_bib_for_abbr,
|
|
242
|
+
full_bib_for_zotero,
|
|
243
|
+
self.template_name,
|
|
244
|
+
self.generate_html,
|
|
245
|
+
self.generate_tex,
|
|
246
|
+
)
|
|
247
|
+
else:
|
|
248
|
+
data_list_md, data_list_tex = [], data_list_md_tex
|
|
249
|
+
|
|
250
|
+
# Generate LaTeX output if enabled
|
|
251
|
+
if self.generate_tex:
|
|
252
|
+
self._python_tex.generate_standard_tex_data_list(
|
|
253
|
+
data_list_tex,
|
|
254
|
+
output_tex,
|
|
255
|
+
self.path_output_new,
|
|
256
|
+
self.fig_folder_name,
|
|
257
|
+
self.tex_folder_name,
|
|
258
|
+
self.bib_folder_name,
|
|
259
|
+
os.path.basename(full_bib_for_abbr),
|
|
260
|
+
self.template_name,
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
# Cleanup original files if enabled
|
|
264
|
+
if self.delete_original_md_in_output_folder:
|
|
265
|
+
self._cleanup_file(os.path.join(self.path_output_new, self.md_folder_name, output_md))
|
|
266
|
+
|
|
267
|
+
if self.delete_original_tex_in_output_folder:
|
|
268
|
+
self._cleanup_file(os.path.join(self.path_output_new, self.tex_folder_name, output_tex))
|
|
269
|
+
|
|
270
|
+
if self.delete_original_bib_in_output_folder:
|
|
271
|
+
for file in [full_bib_for_abbr, full_bib_for_zotero, full_bib_for_save]:
|
|
272
|
+
self._cleanup_file(file)
|
|
273
|
+
|
|
274
|
+
return data_list_md, data_list_tex
|
|
275
|
+
|
|
276
|
+
@staticmethod
|
|
277
|
+
def search_subfile_names(data_list: list[str], postfixes: list[str]) -> list[str]:
|
|
278
|
+
"""Search for figure filenames in content.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
data_list (list[str]): list of content lines to search.
|
|
282
|
+
figure_postfixes (Optional[list[str]], optional): list of figure file extensions to look for. Defaults to None.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
list[str]: list of found figure filenames.
|
|
286
|
+
"""
|
|
287
|
+
regex = re.compile(rf"[\w\-]+\.(?:{'|'.join(postfixes)})", re.I)
|
|
288
|
+
figure_names = []
|
|
289
|
+
for line in data_list:
|
|
290
|
+
figure_names.extend(regex.findall(line))
|
|
291
|
+
return sorted(set(figure_names), key=figure_names.index)
|
|
292
|
+
|
|
293
|
+
@staticmethod
|
|
294
|
+
def shutil_copy_files(
|
|
295
|
+
path_file: str, file_names: list[str], path_output: str, output_folder_name: str, relative_path: bool
|
|
296
|
+
) -> None:
|
|
297
|
+
"""Copy specified files from source directory to output directory.
|
|
298
|
+
|
|
299
|
+
Searches for files recursively in the source directory and copies them to
|
|
300
|
+
the output location, preserving either relative paths or using a flat structure.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
path_file: Source directory path to search for files.
|
|
304
|
+
file_names: list of filenames to copy.
|
|
305
|
+
path_output: Destination directory path.
|
|
306
|
+
output_folder_name: Name of the subfolder in output directory (used when relative_path=False).
|
|
307
|
+
relative_path: If True, preserves relative path structure; if False, uses flat structure.
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
None: Function executes side effects (file copying) but returns nothing.
|
|
311
|
+
"""
|
|
312
|
+
# Early return if no files or invalid source path
|
|
313
|
+
if not file_names or not path_file:
|
|
314
|
+
return None
|
|
315
|
+
|
|
316
|
+
# Validate source directory exists
|
|
317
|
+
if not os.path.exists(path_file):
|
|
318
|
+
print(f"Source directory does not exist: {path_file}")
|
|
319
|
+
return None
|
|
320
|
+
|
|
321
|
+
# Recursively search for matching files
|
|
322
|
+
file_list = []
|
|
323
|
+
for root, _, files in os.walk(path_file, topdown=False):
|
|
324
|
+
for name in files:
|
|
325
|
+
if name in file_names:
|
|
326
|
+
file_list.append(os.path.join(root, name))
|
|
327
|
+
|
|
328
|
+
# Report missing files
|
|
329
|
+
found_files = [os.path.basename(f) for f in file_list]
|
|
330
|
+
not_found = [f for f in file_names if f not in found_files]
|
|
331
|
+
if not_found:
|
|
332
|
+
print(f"Files not found: {', '.join(not_found)}")
|
|
333
|
+
|
|
334
|
+
# Copy each found file to destination
|
|
335
|
+
for file_path in file_list:
|
|
336
|
+
if relative_path:
|
|
337
|
+
# Preserve relative path structure
|
|
338
|
+
path_output_file = file_path.replace(path_file, path_output)
|
|
339
|
+
else:
|
|
340
|
+
# Use flat structure in specified folder
|
|
341
|
+
path_output_file = os.path.join(path_output, output_folder_name, os.path.basename(file_path))
|
|
342
|
+
|
|
343
|
+
# Create destination directory if needed
|
|
344
|
+
output_dir = os.path.dirname(path_output_file)
|
|
345
|
+
if not os.path.exists(output_dir):
|
|
346
|
+
os.makedirs(output_dir)
|
|
347
|
+
|
|
348
|
+
# Perform file copy
|
|
349
|
+
shutil.copy(file_path, path_output_file)
|
|
350
|
+
return None
|
|
351
|
+
|
|
352
|
+
@staticmethod
|
|
353
|
+
def search_cite_keys(data_list: list[str], tex_md_flag: str = ".tex") -> list[str]:
|
|
354
|
+
r"""Extract citation keys from content according to their places.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
data_list (list[str]): list of content lines to search.
|
|
358
|
+
tex_md_flag (str, optional): Flag indicating content format (".tex" or ".md"). Defaults to ".tex".
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
list[str]: list of found citation keys.
|
|
362
|
+
|
|
363
|
+
Note:
|
|
364
|
+
For LaTeX, searches for \\cite, \\citep, \\citet patterns.
|
|
365
|
+
For Markdown, searches for [@key], @key; and ;@key] patterns.
|
|
366
|
+
"""
|
|
367
|
+
cite_key_list = []
|
|
368
|
+
if tex_md_flag == ".tex":
|
|
369
|
+
regex_list = [re.compile(r"\\[a-z]*cite[tp]*{\s*([\w\-.,:/\s]*)\s*}")]
|
|
370
|
+
cite_key_list.extend(regex_list[0].findall("".join(data_list)))
|
|
371
|
+
cite_key_list = combine_content_in_list([re.split(",", c) for c in cite_key_list])
|
|
372
|
+
elif tex_md_flag == ".md":
|
|
373
|
+
regex_list = [
|
|
374
|
+
re.compile(r"\[@([\w\-.:/]+)\]"),
|
|
375
|
+
re.compile(r"@([\w\-.:/]+)\s*;"),
|
|
376
|
+
re.compile(r";\s*@([\w\-.:/]*)\s*]"),
|
|
377
|
+
]
|
|
378
|
+
cite_key_list = combine_content_in_list(
|
|
379
|
+
[regex_list[i].findall("".join(data_list)) for i in range(len(regex_list))]
|
|
380
|
+
)
|
|
381
|
+
else:
|
|
382
|
+
print(f"{tex_md_flag} must be `.tex` or `.md`.")
|
|
383
|
+
|
|
384
|
+
cite_key_list = [c.strip() for c in cite_key_list if c.strip()]
|
|
385
|
+
return sorted(set(cite_key_list), key=cite_key_list.index)
|
|
386
|
+
|
|
387
|
+
def _cleanup_file(self, file_path: str) -> None:
|
|
388
|
+
"""Cleanup files and empty directories.
|
|
389
|
+
|
|
390
|
+
Args:
|
|
391
|
+
file_path (str): Path to file to be removed.
|
|
392
|
+
"""
|
|
393
|
+
if os.path.exists(file_path):
|
|
394
|
+
os.remove(file_path)
|
|
395
|
+
dir_path = os.path.dirname(file_path)
|
|
396
|
+
if dir_path != self.path_output_new: # Don't remove the main output directory
|
|
397
|
+
if len([f for f in os.listdir(dir_path) if f != ".DS_Store"]) == 0:
|
|
398
|
+
shutil.rmtree(dir_path)
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def obtain_search_keywords() -> dict[str, Any]:
|
|
5
|
+
"""Obtain search keywords dictionary.
|
|
6
|
+
|
|
7
|
+
Returns:
|
|
8
|
+
dict[str, Any]: dictionary containing categorized search keywords.
|
|
9
|
+
"""
|
|
10
|
+
_h_ = "(?:| |-)" # hyphen
|
|
11
|
+
|
|
12
|
+
evol = "evol(?:ution|utionary)" # 'evol(?:ution|utionary|ve|ved|ving)'
|
|
13
|
+
computation = "computation(?:|al)"
|
|
14
|
+
strateg = "strateg(?:y|ies)"
|
|
15
|
+
program = "program(?:|ming)"
|
|
16
|
+
algorithm = "algorithm(?:|s)"
|
|
17
|
+
automat = "automat(?:ed|ion)"
|
|
18
|
+
keywords_ec = [ # evolution computation
|
|
19
|
+
["simulated annealing"],
|
|
20
|
+
["taboo search"],
|
|
21
|
+
[f"{evol} {strateg}"],
|
|
22
|
+
["CMA-ES"], #
|
|
23
|
+
[f"{evol} {program}"],
|
|
24
|
+
[f"differential {evol}"],
|
|
25
|
+
[f"{evol} {algorithm}"],
|
|
26
|
+
[[evol], [strateg, program, "differential", algorithm]],
|
|
27
|
+
[f"genetic {algorithm}"],
|
|
28
|
+
[f"genetic {program}"],
|
|
29
|
+
[["genetic"], [algorithm, program]],
|
|
30
|
+
["particle swarm"],
|
|
31
|
+
[["swarm"], ["particle"]],
|
|
32
|
+
["ant colony"],
|
|
33
|
+
["bee colony"],
|
|
34
|
+
[["colony"], ["ant", "bee"]],
|
|
35
|
+
[f"memetic {algorithm}"],
|
|
36
|
+
[f"population{_h_}based"],
|
|
37
|
+
["quality diversity"],
|
|
38
|
+
[evol, algorithm, automat],
|
|
39
|
+
[evol, computation],
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
keywords_ss = [ # search strategy
|
|
43
|
+
["local search"],
|
|
44
|
+
[["local", "search"], ["local search"]],
|
|
45
|
+
["local optimization"],
|
|
46
|
+
[["local", "optimization"], ["local optimization"]],
|
|
47
|
+
["random search"],
|
|
48
|
+
[["random", "search"], ["random search"]],
|
|
49
|
+
["random optimization"],
|
|
50
|
+
[["random", "optimization"], ["random optimization"]],
|
|
51
|
+
["global search"],
|
|
52
|
+
[["global", "search"], ["global search"]],
|
|
53
|
+
["global optimization"],
|
|
54
|
+
[["global", "optimization"], ["global optimization"]],
|
|
55
|
+
["heuristic search"],
|
|
56
|
+
[["heuristic", "search"], ["heuristic search"]],
|
|
57
|
+
["heuristic optimization"],
|
|
58
|
+
[["heuristic", "optimization"], ["heuristic optimization"]],
|
|
59
|
+
]
|
|
60
|
+
|
|
61
|
+
nsga = "NSGA(?:|II|-II|III|-III)"
|
|
62
|
+
moea_d = "MOEA/D"
|
|
63
|
+
network = "network(?:|s)"
|
|
64
|
+
uncertain = "uncertain(?:|ty)"
|
|
65
|
+
keywords_multi = [ # multi objective
|
|
66
|
+
[moea_d],
|
|
67
|
+
[nsga],
|
|
68
|
+
[f"multi{_h_}objective optimization"],
|
|
69
|
+
[[f"multi{_h_}objective", "optimization"], [f"multi{_h_}objective optimization"]],
|
|
70
|
+
[[f"multi{_h_}objective"], ["optimization"]],
|
|
71
|
+
[f"multi{_h_}model optimization"],
|
|
72
|
+
[[f"multi{_h_}model", "optimization"], [f"multi{_h_}model optimization"]],
|
|
73
|
+
[[f"multi{_h_}model"], ["optimization"]],
|
|
74
|
+
[f"many{_h_}objective optimization"],
|
|
75
|
+
[[f"many{_h_}objective", "optimization"], [f"many{_h_}objective optimization"]],
|
|
76
|
+
[[f"many{_h_}objective"], ["optimization"]],
|
|
77
|
+
[f"dynamic multi{_h_}objective"],
|
|
78
|
+
[f"dynamic {evol} multi{_h_}objective"],
|
|
79
|
+
[["dynamic", f"multi{_h_}objective"], [f"dynamic multi{_h_}objective", f"dynamic {evol} multi{_h_}objective"]],
|
|
80
|
+
[f"dynamic multi{_h_}model"],
|
|
81
|
+
[["dynamic", f"multi{_h_}model"], [f"dynamic multi{_h_}model"]],
|
|
82
|
+
[f"dynamic many{_h_}objective"],
|
|
83
|
+
[f"dynamic {evol} many{_h_}objective"],
|
|
84
|
+
[["dynamic", f"many{_h_}objective"], [f"dynamic many{_h_}objective", f"dynamic {evol} many{_h_}objective"]],
|
|
85
|
+
["dynamic", "optimization"],
|
|
86
|
+
["dynamic", network],
|
|
87
|
+
[["dynamic"], [f"multi{_h_}objective", f"multi{_h_}model", f"many{_h_}objective", "optimization", network]],
|
|
88
|
+
[f"{uncertain} optimization"],
|
|
89
|
+
[[uncertain, "optimization"], [f"{uncertain} optimization"]],
|
|
90
|
+
[[uncertain], ["optimization"]],
|
|
91
|
+
["pareto optimization"],
|
|
92
|
+
[["pareto", "optimization"], ["pareto optimization"]],
|
|
93
|
+
[["pareto"], ["optimization"]],
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
dimension = "dimension(?:|al)"
|
|
97
|
+
distribut = "distribut(?:ion|ed)"
|
|
98
|
+
keywords_parallel = [ # parallel
|
|
99
|
+
[f"large{_h_}scale"],
|
|
100
|
+
[f"high{_h_}{dimension}"],
|
|
101
|
+
[f"high{_h_}performance"],
|
|
102
|
+
["parallel", evol],
|
|
103
|
+
["parallel", algorithm],
|
|
104
|
+
[["parallel"], [evol, algorithm]],
|
|
105
|
+
[distribut, evol],
|
|
106
|
+
[distribut, algorithm],
|
|
107
|
+
[[distribut], [evol, algorithm]],
|
|
108
|
+
]
|
|
109
|
+
|
|
110
|
+
keywords_mo = [ # math optimization
|
|
111
|
+
[f"zero{_h_}orde", "optimization"],
|
|
112
|
+
["coordinate", "descent"],
|
|
113
|
+
["gradient", "descent"],
|
|
114
|
+
["gradient", "stochastic"],
|
|
115
|
+
[["gradient"], ["descent", "stochastic"]],
|
|
116
|
+
["convex", "optimization"],
|
|
117
|
+
[f"non{_h_}convex", "optimization"],
|
|
118
|
+
[["convex"], [f"non{_h_}convex", "optimization"]],
|
|
119
|
+
[[f"non{_h_}convex"], ["convex", "optimization"]],
|
|
120
|
+
["stochastic", "optimization"],
|
|
121
|
+
[["stochastic"], ["optimization"]],
|
|
122
|
+
["gaussian", "distribution"],
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
multi_task = "multi(?:|-)task"
|
|
126
|
+
federa = "federa(?:l|ted)"
|
|
127
|
+
weakly_ = f"weakly{_h_}"
|
|
128
|
+
generat = "generat(?:ive|ion)"
|
|
129
|
+
keywords_ml = [ # machine learning
|
|
130
|
+
["automated", "machine", "learning"],
|
|
131
|
+
[["machine", "learning"], [automat]],
|
|
132
|
+
["deep", "learning"],
|
|
133
|
+
[f"semi{_h_}supervised", "learning"],
|
|
134
|
+
[f"self{_h_}supervised", "learning"],
|
|
135
|
+
[f"{weakly_}supervised", "learning"],
|
|
136
|
+
["unsupervised", "learning"],
|
|
137
|
+
[f"multi{_h_}instance", "learning"],
|
|
138
|
+
["active", "learning"],
|
|
139
|
+
[
|
|
140
|
+
["supervised", "learning"],
|
|
141
|
+
[f"semi{_h_}supervised", f"self{_h_}supervised", f"weakly{_h_}supervised", "unsupervised"],
|
|
142
|
+
],
|
|
143
|
+
["reinforcement", "learning", f"on{_h_}policy"],
|
|
144
|
+
["reinforcement", "learning", f"off{_h_}policy"],
|
|
145
|
+
["reinforcement", "learning", "offline"],
|
|
146
|
+
["reinforcement", "learning", f"model{_h_}based"],
|
|
147
|
+
["reinforcement", "learning", "continual"],
|
|
148
|
+
["reinforcement", "learning", "deep"],
|
|
149
|
+
["reinforcement", "learning", evol],
|
|
150
|
+
[
|
|
151
|
+
["reinforcement", "learning"],
|
|
152
|
+
["offline", f"on{_h_}policy", f"off{_h_}policy", f"model{_h_}based", "deep", "continual", evol],
|
|
153
|
+
],
|
|
154
|
+
["policy", "search"],
|
|
155
|
+
[["policy"], ["policy", "search"]],
|
|
156
|
+
[f"q{_h_}learning"],
|
|
157
|
+
["manifold", "learning"],
|
|
158
|
+
[["manifold"], ["Learning"]],
|
|
159
|
+
[multi_task, "learning"],
|
|
160
|
+
[[multi_task], ["learning"]],
|
|
161
|
+
["transfe", "learning"],
|
|
162
|
+
[["transfe"], ["Learning"]],
|
|
163
|
+
["domain", "adaptation"],
|
|
164
|
+
["domain", "generalization"],
|
|
165
|
+
[f"meta{_h_}learning"],
|
|
166
|
+
[[f"meta{_h_}learning"], ["learning"]],
|
|
167
|
+
[federa, "learning"],
|
|
168
|
+
[[federa], ["learning"]],
|
|
169
|
+
["ensemble", "learning"],
|
|
170
|
+
[["ensemble"], ["learning"]],
|
|
171
|
+
["online", "learning"],
|
|
172
|
+
[f"few{_h_}shot", "learning"],
|
|
173
|
+
[[f"few{_h_}shot"], ["learning"]],
|
|
174
|
+
[f"one{_h_}shot", "learning"],
|
|
175
|
+
[[f"one{_h_}shot"], ["learning"]],
|
|
176
|
+
[f"zero{_h_}shot", "learning"],
|
|
177
|
+
[[f"zero{_h_}shot"], ["learning"]],
|
|
178
|
+
["representation", "learning"],
|
|
179
|
+
[["representation"], ["learning"]],
|
|
180
|
+
["induction"],
|
|
181
|
+
["deduction"],
|
|
182
|
+
["transduction"],
|
|
183
|
+
["neural", network],
|
|
184
|
+
["graph", network],
|
|
185
|
+
[[network], ["graph", "neural"]],
|
|
186
|
+
[["graph"], [network, "neural"]],
|
|
187
|
+
["kernel"],
|
|
188
|
+
["embedding"],
|
|
189
|
+
["transformer"],
|
|
190
|
+
["diffusion", "model"],
|
|
191
|
+
[["diffusion"], ["model"]],
|
|
192
|
+
[generat, "model"],
|
|
193
|
+
[[generat], ["model"]],
|
|
194
|
+
["large language model"],
|
|
195
|
+
[["large", "language", "model"], ["large language model"]],
|
|
196
|
+
]
|
|
197
|
+
|
|
198
|
+
cluster = "cluster(?:|s|ing)"
|
|
199
|
+
data_driven = "date(?:| |-)driven"
|
|
200
|
+
prove = "prov(?:able|e)"
|
|
201
|
+
predict = "predict(?:|ed|ion)"
|
|
202
|
+
recommend = "recommend(?:ed|ation)"
|
|
203
|
+
markov = "markov(?:|ian)"
|
|
204
|
+
keywords_ec_ml = [ # evolution computation and machine learning
|
|
205
|
+
["neuro(?:| |-)evolution"],
|
|
206
|
+
["adaptation"],
|
|
207
|
+
["bayesian", "optimization"],
|
|
208
|
+
["bi-level", "optimization"],
|
|
209
|
+
["bayesian", "inference"],
|
|
210
|
+
["bayesian", "learning"],
|
|
211
|
+
[["bayesian"], ["optimization", "inference", "learning"]],
|
|
212
|
+
[markov, "decision"],
|
|
213
|
+
[markov, "chain"],
|
|
214
|
+
[[markov], ["decision", "chain"]],
|
|
215
|
+
[prove],
|
|
216
|
+
["time", "series"],
|
|
217
|
+
[cluster],
|
|
218
|
+
[f"co{_h_}evolution", f"co{_h_}operation"],
|
|
219
|
+
[[f"co{_h_}evolution"], [f"co{_h_}operation"]],
|
|
220
|
+
[[f"co{_h_}operation"], [f"co{_h_}evolution"]],
|
|
221
|
+
[data_driven],
|
|
222
|
+
[predict],
|
|
223
|
+
[recommend, "system"],
|
|
224
|
+
[distribut, "shift"],
|
|
225
|
+
]
|
|
226
|
+
|
|
227
|
+
converg = "converg(?:e|ence|ent|ed|ing)"
|
|
228
|
+
theor = "theor(?:y|etic|etical|etically)"
|
|
229
|
+
analy = "analy(?:ze|sis|zed|zing)"
|
|
230
|
+
bound = "bound(?:|s)"
|
|
231
|
+
run = "run(?:|ning)"
|
|
232
|
+
keywords_theory = [ # theory
|
|
233
|
+
["drift", "analysis"],
|
|
234
|
+
["hitting", "time"],
|
|
235
|
+
[evol, converg],
|
|
236
|
+
[evol, "time"],
|
|
237
|
+
[evol, theor],
|
|
238
|
+
[evol, bound],
|
|
239
|
+
[evol, "complexity"],
|
|
240
|
+
["swarm", converg],
|
|
241
|
+
["swarm", "time"],
|
|
242
|
+
["swarm", theor],
|
|
243
|
+
["swarm", bound],
|
|
244
|
+
["swarm", "complexity"],
|
|
245
|
+
["colony", converg],
|
|
246
|
+
["colony", "time"],
|
|
247
|
+
["colony", theor],
|
|
248
|
+
["colony", bound],
|
|
249
|
+
["colony", "complexity"],
|
|
250
|
+
["genetic", converg],
|
|
251
|
+
["genetic", "time"],
|
|
252
|
+
["genetic", theor],
|
|
253
|
+
["genetic", bound],
|
|
254
|
+
["genetic", "complexity"],
|
|
255
|
+
[analy, converg],
|
|
256
|
+
[analy, "time"],
|
|
257
|
+
[analy, theor],
|
|
258
|
+
[analy, bound],
|
|
259
|
+
[analy, "complexity"],
|
|
260
|
+
[computation, "time"],
|
|
261
|
+
[f"{run} time"],
|
|
262
|
+
["upper", bound],
|
|
263
|
+
["lower", bound],
|
|
264
|
+
[[converg], [evol, "swarm", "colony", "genetic", analy]],
|
|
265
|
+
[["time"], [evol, "swarm", "colony", "genetic", analy, "hitting", computation, run]],
|
|
266
|
+
[[theor], [evol, "swarm", "colony", "genetic", analy]],
|
|
267
|
+
[[bound], [evol, "swarm", "colony", "genetic", analy, "upper", "lower"]],
|
|
268
|
+
[["complexity"], [evol, "swarm", "colony", "genetic", analy]],
|
|
269
|
+
[[analy], [converg, "time", theor, bound, "complexity"]],
|
|
270
|
+
]
|
|
271
|
+
|
|
272
|
+
keywords_dict = {
|
|
273
|
+
"EC": keywords_ec,
|
|
274
|
+
"SS": keywords_ss,
|
|
275
|
+
"Multi": keywords_multi,
|
|
276
|
+
"Parallel": keywords_parallel,
|
|
277
|
+
"MO": keywords_mo,
|
|
278
|
+
"ML": keywords_ml,
|
|
279
|
+
"ECML": keywords_ec_ml,
|
|
280
|
+
"Theory": keywords_theory,
|
|
281
|
+
}
|
|
282
|
+
return keywords_dict
|