pyeasyphd 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyeasyphd might be problematic. Click here for more details.
- pyeasyphd/.python-version +1 -0
- pyeasyphd/Main.sublime-menu +43 -0
- pyeasyphd/__init__.py +0 -0
- pyeasyphd/bib/__init__.py +1 -0
- pyeasyphd/bib/bibtexbase/__init__.py +7 -0
- pyeasyphd/bib/bibtexbase/standardize/_base.py +36 -0
- pyeasyphd/bib/bibtexbase/standardize/default_data.py +97 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +54 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +38 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +310 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +35 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +34 -0
- pyeasyphd/bib/bibtexbase/standardize_bib.py +75 -0
- pyeasyphd/bib/bibtexparser/__init__.py +47 -0
- pyeasyphd/bib/bibtexparser/bibtex_format.py +87 -0
- pyeasyphd/bib/bibtexparser/exceptions.py +64 -0
- pyeasyphd/bib/bibtexparser/library.py +207 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/add.py +94 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +22 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +62 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +47 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +31 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +222 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +34 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +33 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +70 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +15 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +113 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +34 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +21 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +28 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/title.py +20 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +98 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +29 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +124 -0
- pyeasyphd/bib/bibtexparser/middlewares/middleware.py +222 -0
- pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +13 -0
- pyeasyphd/bib/bibtexparser/middlewares/utils.py +226 -0
- pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +414 -0
- pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +42 -0
- pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +35 -0
- pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +29 -0
- pyeasyphd/bib/bibtexparser/model.py +481 -0
- pyeasyphd/bib/bibtexparser/splitter.py +151 -0
- pyeasyphd/bib/core/__init__.py +18 -0
- pyeasyphd/bib/core/convert_library_to_library.py +31 -0
- pyeasyphd/bib/core/convert_library_to_str.py +199 -0
- pyeasyphd/bib/core/convert_str_to_library.py +34 -0
- pyeasyphd/bib/core/convert_str_to_str.py +27 -0
- pyeasyphd/main/__init__.py +17 -0
- pyeasyphd/main/basic_input.py +149 -0
- pyeasyphd/main/pandoc_md_to.py +361 -0
- pyeasyphd/main/python_run_bib.py +73 -0
- pyeasyphd/main/python_run_md.py +235 -0
- pyeasyphd/main/python_run_tex.py +149 -0
- pyeasyphd/main/python_writers.py +212 -0
- pyeasyphd/pyeasyphd.py +72 -0
- pyeasyphd/pyeasyphd.sublime-settings +235 -0
- pyeasyphd/pyeasyphd.sublime-syntax +5 -0
- pyeasyphd/tools/__init__.py +30 -0
- pyeasyphd/tools/compare/compare_bibs.py +234 -0
- pyeasyphd/tools/experiments_base.py +203 -0
- pyeasyphd/tools/format_save_bibs.py +178 -0
- pyeasyphd/tools/generate/generate_from_bibs.py +447 -0
- pyeasyphd/tools/generate/generate_links.py +356 -0
- pyeasyphd/tools/py_run_bib_md_tex.py +378 -0
- pyeasyphd/tools/replace/replace.py +81 -0
- pyeasyphd/tools/search/data.py +318 -0
- pyeasyphd/tools/search/search_base.py +118 -0
- pyeasyphd/tools/search/search_core.py +326 -0
- pyeasyphd/tools/search/search_keywords.py +227 -0
- pyeasyphd/tools/search/search_writers.py +288 -0
- pyeasyphd/tools/search/utils.py +152 -0
- pyeasyphd/tools/spider/process_spider_bib.py +247 -0
- pyeasyphd/tools/spider/process_spider_url.py +74 -0
- pyeasyphd/tools/spider/process_spider_url_bib.py +62 -0
- pyeasyphd/utils/utils.py +62 -0
- pyeasyphd-0.0.2.dist-info/METADATA +27 -0
- pyeasyphd-0.0.2.dist-info/RECORD +80 -0
- pyeasyphd-0.0.2.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,378 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
import shutil
|
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
5
|
+
|
|
6
|
+
from pyadvtools import (
|
|
7
|
+
combine_content_in_list,
|
|
8
|
+
read_list,
|
|
9
|
+
standard_path,
|
|
10
|
+
write_list,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
from ..bib.bibtexparser import Library
|
|
14
|
+
from ..main import BasicInput, PythonRunBib, PythonRunMd, PythonRunTex, PythonWriters
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PyRunBibMdTex(BasicInput):
|
|
18
|
+
"""A class for processing BibTeX, Markdown and LaTeX files with various operations.
|
|
19
|
+
|
|
20
|
+
This class provides functionality to handle references, figures, and content conversion
|
|
21
|
+
between Markdown and LaTeX formats.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self, path_output: str, tex_md_flag: str = ".md", template_name: str = "paper", options: Dict[str, Any] = {}
|
|
26
|
+
) -> None:
|
|
27
|
+
"""Initialize the PyRunBibMdTex instance.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
path_output : str
|
|
32
|
+
Output directory path for processed files
|
|
33
|
+
tex_md_flag : str, optional
|
|
34
|
+
Flag indicating whether to process as LaTeX (".tex") or Markdown (".md"),
|
|
35
|
+
by default ".md"
|
|
36
|
+
template_name : str, optional
|
|
37
|
+
Template type to use ("paper" or "beamer"), by default "paper"
|
|
38
|
+
options : Dict[str, Any], optional
|
|
39
|
+
Additional configuration options, by default {}
|
|
40
|
+
|
|
41
|
+
Raises
|
|
42
|
+
------
|
|
43
|
+
AssertionError
|
|
44
|
+
If tex_md_flag is not ".tex" or ".md"
|
|
45
|
+
If template_name is not "paper" or "beamer"
|
|
46
|
+
"""
|
|
47
|
+
super().__init__(options)
|
|
48
|
+
|
|
49
|
+
self.tex_md_flag = re.sub(r"\.+", ".", "." + tex_md_flag)
|
|
50
|
+
assert self.tex_md_flag in [".tex", ".md"], f"{tex_md_flag} must be `.tex` or `.md`."
|
|
51
|
+
self.template_name = template_name.lower()
|
|
52
|
+
assert self.template_name in ["paper", "beamer"], f"{template_name} must be `paper` or `beamer`."
|
|
53
|
+
self.path_output = standard_path(path_output)
|
|
54
|
+
|
|
55
|
+
# Configuration options
|
|
56
|
+
self.generate_html = options.get("generate_html", False)
|
|
57
|
+
self.generate_tex = options.get("generate_tex", True)
|
|
58
|
+
self.shutil_figures = options.get("shutil_figures", True)
|
|
59
|
+
|
|
60
|
+
# Folder name configurations
|
|
61
|
+
self.figure_folder_name = options.get("figure_folder_name", "fig") # "" or "figs" or "main"
|
|
62
|
+
self.bib_folder_name = options.get("bib_folder_name", "bib") # "" or "bibs" or "main"
|
|
63
|
+
self.md_folder_name = options.get("md_folder_name", "md") # "" or "mds" or "main"
|
|
64
|
+
self.tex_folder_name = options.get("tex_folder_name", "tex") # "" or "texes" or "main"
|
|
65
|
+
|
|
66
|
+
# Cleanup options
|
|
67
|
+
self.delete_original_md_in_output_folder = options.get("delete_original_md_in_output_folder", False)
|
|
68
|
+
self.delete_original_tex_in_output_folder = options.get("delete_original_tex_in_output_folder", False)
|
|
69
|
+
self.delete_original_bib_in_output_folder = options.get("delete_original_bib_in_output_folder", False)
|
|
70
|
+
|
|
71
|
+
# Initialize helper classes
|
|
72
|
+
self._python_bib = PythonRunBib(self.options)
|
|
73
|
+
self._python_md = PythonRunMd(self.options)
|
|
74
|
+
self._python_tex = PythonRunTex(self.options)
|
|
75
|
+
self._python_writer = PythonWriters(self.options)
|
|
76
|
+
|
|
77
|
+
def run_files(
|
|
78
|
+
self, file_list_md_tex: List[str], output_prefix: str = "", output_level: str = "next"
|
|
79
|
+
) -> Tuple[List[str], List[str]]:
|
|
80
|
+
"""Process a list of Markdown or LaTeX files.
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
file_list_md_tex : List[str]
|
|
85
|
+
List of input file paths (Markdown or LaTeX)
|
|
86
|
+
output_prefix : str, optional
|
|
87
|
+
Prefix for output files, by default ""
|
|
88
|
+
output_level : str, optional
|
|
89
|
+
Output directory level ("previous", "current", or "next"), by default "next"
|
|
90
|
+
|
|
91
|
+
Returns
|
|
92
|
+
-------
|
|
93
|
+
Tuple[List[str], List[str]]
|
|
94
|
+
Tuple containing processed Markdown content and LaTeX content
|
|
95
|
+
"""
|
|
96
|
+
file_list_md_tex = [f for f in file_list_md_tex if f.endswith(self.tex_md_flag)]
|
|
97
|
+
data_list_list = [read_list(standard_path(f), "r") for f in file_list_md_tex]
|
|
98
|
+
if all([len(data_list) == 0 for data_list in data_list_list]):
|
|
99
|
+
return [], []
|
|
100
|
+
|
|
101
|
+
file_base_name = os.path.splitext(os.path.basename(file_list_md_tex[0]))[0]
|
|
102
|
+
output_prefix = output_prefix if output_prefix else file_base_name
|
|
103
|
+
|
|
104
|
+
data_list_md_tex = combine_content_in_list(data_list_list, ["\n"])
|
|
105
|
+
|
|
106
|
+
content_md, content_tex = self.python_run_bib_md_tex(
|
|
107
|
+
output_prefix, data_list_md_tex, self.path_bibs, output_level
|
|
108
|
+
)
|
|
109
|
+
return content_md, content_tex
|
|
110
|
+
|
|
111
|
+
def python_run_bib_md_tex(
|
|
112
|
+
self,
|
|
113
|
+
output_prefix: str,
|
|
114
|
+
data_list_md_tex: List[str],
|
|
115
|
+
original_bib_data: Union[List[str], str, Library],
|
|
116
|
+
output_level: str = "next",
|
|
117
|
+
) -> Tuple[List[str], List[str]]:
|
|
118
|
+
"""Process BibTeX, Markdown and LaTeX content.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
output_prefix : str
|
|
123
|
+
Prefix for output files
|
|
124
|
+
data_list_md_tex : List[str]
|
|
125
|
+
List of content lines (Markdown or LaTeX)
|
|
126
|
+
original_bib_data : Union[List[str], str, Library]
|
|
127
|
+
BibTeX data in various formats
|
|
128
|
+
output_level : str, optional
|
|
129
|
+
Output directory level ("previous", "current", or "next"), by default "next"
|
|
130
|
+
|
|
131
|
+
Returns
|
|
132
|
+
-------
|
|
133
|
+
Tuple[List[str], List[str]]
|
|
134
|
+
Tuple containing processed Markdown content and LaTeX content
|
|
135
|
+
"""
|
|
136
|
+
# Basic file names
|
|
137
|
+
output_tex, output_md = output_prefix + ".tex", output_prefix + ".md"
|
|
138
|
+
|
|
139
|
+
if len(data_list_md_tex) == 0:
|
|
140
|
+
original_bib_data = self._python_bib.parse_to_single_standard_library(original_bib_data)
|
|
141
|
+
if not original_bib_data.entries:
|
|
142
|
+
return [], []
|
|
143
|
+
|
|
144
|
+
data_list_md_tex = []
|
|
145
|
+
for entry in original_bib_data.entries:
|
|
146
|
+
data_list_md_tex.append(f"- [@{entry.key}]\n\n")
|
|
147
|
+
data_list_md_tex.insert(0, f"## {output_prefix} - {len(data_list_md_tex)}\n\n")
|
|
148
|
+
|
|
149
|
+
# Determine output path based on level
|
|
150
|
+
if output_level == "previous":
|
|
151
|
+
path_output = os.path.dirname(self.path_output)
|
|
152
|
+
elif output_level == "next":
|
|
153
|
+
path_output = os.path.join(self.path_output, output_prefix)
|
|
154
|
+
elif output_level == "current":
|
|
155
|
+
path_output = self.path_output
|
|
156
|
+
else:
|
|
157
|
+
path_output = self.path_output
|
|
158
|
+
|
|
159
|
+
if not os.path.exists(path_output):
|
|
160
|
+
os.makedirs(path_output)
|
|
161
|
+
self.path_output = standard_path(path_output)
|
|
162
|
+
|
|
163
|
+
return self._python_run_bib_md_tex(output_md, output_tex, data_list_md_tex, original_bib_data)
|
|
164
|
+
|
|
165
|
+
def _python_run_bib_md_tex(
|
|
166
|
+
self,
|
|
167
|
+
output_md: str,
|
|
168
|
+
output_tex: str,
|
|
169
|
+
data_list_md_tex: List[str],
|
|
170
|
+
original_bib_data: Union[List[str], str, Library],
|
|
171
|
+
) -> Tuple[List[str], List[str]]:
|
|
172
|
+
"""Process BibTeX, Markdown and LaTeX content.
|
|
173
|
+
|
|
174
|
+
Parameters
|
|
175
|
+
----------
|
|
176
|
+
output_md : str
|
|
177
|
+
Output Markdown filename
|
|
178
|
+
output_tex : str
|
|
179
|
+
Output LaTeX filename
|
|
180
|
+
data_list_md_tex : List[str]
|
|
181
|
+
List of content lines (Markdown or LaTeX)
|
|
182
|
+
original_bib_data : Union[List[str], str, Library]
|
|
183
|
+
BibTeX data in various formats
|
|
184
|
+
|
|
185
|
+
Returns
|
|
186
|
+
-------
|
|
187
|
+
Tuple[List[str], List[str]]
|
|
188
|
+
Tuple containing processed Markdown content and LaTeX content
|
|
189
|
+
"""
|
|
190
|
+
# Copy figures if enabled
|
|
191
|
+
if self.shutil_figures:
|
|
192
|
+
figure_names = self.search_figure_names(data_list_md_tex)
|
|
193
|
+
self.shutil_copy_figures(self.figure_folder_name, self.path_figures, figure_names, self.path_output)
|
|
194
|
+
|
|
195
|
+
# Extract citation keys from content
|
|
196
|
+
key_in_md_tex = self.search_cite_keys(data_list_md_tex, self.tex_md_flag)
|
|
197
|
+
|
|
198
|
+
# Process bibliography
|
|
199
|
+
full_bib_for_zotero, full_bib_for_abbr, full_bib_for_save = "", "", ""
|
|
200
|
+
if key_in_md_tex:
|
|
201
|
+
# Generate bib contents
|
|
202
|
+
abbr_library, zotero_library, save_library = self._python_bib.parse_to_multi_standard_library(
|
|
203
|
+
original_bib_data, key_in_md_tex
|
|
204
|
+
)
|
|
205
|
+
key_in_md_tex = sorted(list(abbr_library.entries_dict.keys()), key=key_in_md_tex.index)
|
|
206
|
+
|
|
207
|
+
# Write bibliography files
|
|
208
|
+
_path_output = os.path.join(self.path_output, self.bib_folder_name)
|
|
209
|
+
full_bib_for_abbr, full_bib_for_zotero, full_bib_for_save = self._python_writer.write_multi_library_to_file(
|
|
210
|
+
_path_output, abbr_library, zotero_library, save_library, key_in_md_tex
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
# Process content based on format
|
|
214
|
+
if self.tex_md_flag == ".md":
|
|
215
|
+
# Write original markdown content
|
|
216
|
+
write_list(data_list_md_tex, output_md, "w", os.path.join(self.path_output, self.md_folder_name), False)
|
|
217
|
+
|
|
218
|
+
# Generate processed content and write to given files
|
|
219
|
+
data_list_md, data_list_tex = self._python_md.special_operate_for_md(
|
|
220
|
+
self.path_output,
|
|
221
|
+
data_list_md_tex,
|
|
222
|
+
output_md,
|
|
223
|
+
full_bib_for_abbr,
|
|
224
|
+
full_bib_for_zotero,
|
|
225
|
+
self.template_name,
|
|
226
|
+
self.generate_html,
|
|
227
|
+
self.generate_tex,
|
|
228
|
+
)
|
|
229
|
+
else:
|
|
230
|
+
data_list_md, data_list_tex = [], data_list_md_tex
|
|
231
|
+
|
|
232
|
+
# Generate LaTeX output if enabled
|
|
233
|
+
if self.generate_tex:
|
|
234
|
+
self._python_tex.generate_standard_tex_data_list(
|
|
235
|
+
data_list_tex,
|
|
236
|
+
output_tex,
|
|
237
|
+
self.path_output,
|
|
238
|
+
self.figure_folder_name,
|
|
239
|
+
self.tex_folder_name,
|
|
240
|
+
self.bib_folder_name,
|
|
241
|
+
os.path.basename(full_bib_for_abbr),
|
|
242
|
+
self.template_name,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Cleanup original files if enabled
|
|
246
|
+
if self.delete_original_md_in_output_folder:
|
|
247
|
+
self._cleanup_file(os.path.join(self.path_output, self.md_folder_name, output_md))
|
|
248
|
+
|
|
249
|
+
if self.delete_original_tex_in_output_folder:
|
|
250
|
+
self._cleanup_file(os.path.join(self.path_output, self.tex_folder_name, output_tex))
|
|
251
|
+
|
|
252
|
+
if self.delete_original_bib_in_output_folder:
|
|
253
|
+
for file in [full_bib_for_abbr, full_bib_for_zotero, full_bib_for_save]:
|
|
254
|
+
self._cleanup_file(file)
|
|
255
|
+
|
|
256
|
+
return data_list_md, data_list_tex
|
|
257
|
+
|
|
258
|
+
@staticmethod
|
|
259
|
+
def search_figure_names(data_list: List[str], figure_postfixes: Optional[List[str]] = None) -> List[str]:
|
|
260
|
+
"""Search for figure filenames in content.
|
|
261
|
+
|
|
262
|
+
Parameters
|
|
263
|
+
----------
|
|
264
|
+
data_list : List[str]
|
|
265
|
+
List of content lines to search
|
|
266
|
+
figure_postfixes : Optional[List[str]], optional
|
|
267
|
+
List of figure file extensions to look for, by default None
|
|
268
|
+
|
|
269
|
+
Returns
|
|
270
|
+
-------
|
|
271
|
+
List[str]
|
|
272
|
+
List of found figure filenames
|
|
273
|
+
"""
|
|
274
|
+
if figure_postfixes is None:
|
|
275
|
+
figure_postfixes = ["eps", "jpg", "png", "svg", "psd", "raw", "jpeg", "pdf"]
|
|
276
|
+
|
|
277
|
+
regex = re.compile(rf'[\w\-]+\.(?:{"|".join(figure_postfixes)})', re.I)
|
|
278
|
+
figure_names = []
|
|
279
|
+
for line in data_list:
|
|
280
|
+
figure_names.extend(regex.findall(line))
|
|
281
|
+
return sorted(set(figure_names), key=figure_names.index)
|
|
282
|
+
|
|
283
|
+
@staticmethod
|
|
284
|
+
def shutil_copy_figures(fig_folder_name: str, path_fig: str, fig_names: List[str], path_output: str) -> None:
|
|
285
|
+
"""Copy figure files to output directory.
|
|
286
|
+
|
|
287
|
+
Parameters
|
|
288
|
+
----------
|
|
289
|
+
fig_folder_name : str
|
|
290
|
+
Name of figures folder in output directory
|
|
291
|
+
path_fig : str
|
|
292
|
+
Source directory containing figures
|
|
293
|
+
fig_names : List[str]
|
|
294
|
+
List of figure filenames to copy
|
|
295
|
+
path_output : str
|
|
296
|
+
Output directory path
|
|
297
|
+
|
|
298
|
+
Returns
|
|
299
|
+
-------
|
|
300
|
+
None
|
|
301
|
+
"""
|
|
302
|
+
if not os.path.exists(path_fig):
|
|
303
|
+
print(f"{path_fig} does not existed.")
|
|
304
|
+
return None
|
|
305
|
+
|
|
306
|
+
file_list = []
|
|
307
|
+
for root, _, files in os.walk(path_fig, topdown=False):
|
|
308
|
+
for name in files:
|
|
309
|
+
if name in fig_names:
|
|
310
|
+
file_list.append(os.path.join(root, name))
|
|
311
|
+
|
|
312
|
+
for file in file_list:
|
|
313
|
+
path_output_file = os.path.join(path_output, fig_folder_name, os.path.basename(file))
|
|
314
|
+
p = os.path.dirname(path_output_file)
|
|
315
|
+
if not os.path.exists(p):
|
|
316
|
+
os.makedirs(p)
|
|
317
|
+
shutil.copy(file, path_output_file)
|
|
318
|
+
return None
|
|
319
|
+
|
|
320
|
+
@staticmethod
|
|
321
|
+
def search_cite_keys(data_list: List[str], tex_md_flag: str = ".tex") -> List[str]:
|
|
322
|
+
r"""Extract citation keys from content according to their places.
|
|
323
|
+
|
|
324
|
+
Parameters
|
|
325
|
+
----------
|
|
326
|
+
data_list : List[str]
|
|
327
|
+
List of content lines to search
|
|
328
|
+
tex_md_flag : str, optional
|
|
329
|
+
Flag indicating content format (".tex" or ".md"), by default ".tex"
|
|
330
|
+
|
|
331
|
+
Returns
|
|
332
|
+
-------
|
|
333
|
+
List[str]
|
|
334
|
+
List of found citation keys
|
|
335
|
+
|
|
336
|
+
Notes
|
|
337
|
+
-----
|
|
338
|
+
For LaTeX, searches for \\cite, \\citep, \\citet patterns
|
|
339
|
+
For Markdown, searches for [@key], @key; and ;@key] patterns
|
|
340
|
+
"""
|
|
341
|
+
cite_key_list = []
|
|
342
|
+
if tex_md_flag == ".tex":
|
|
343
|
+
regex_list = [re.compile(r"\\[a-z]*cite[tp]*{\s*([\w\-.,:/\s]*)\s*}")]
|
|
344
|
+
cite_key_list.extend(regex_list[0].findall("".join(data_list)))
|
|
345
|
+
cite_key_list = combine_content_in_list([re.split(",", c) for c in cite_key_list])
|
|
346
|
+
elif tex_md_flag == ".md":
|
|
347
|
+
regex_list = [
|
|
348
|
+
re.compile(r"\[@([\w\-.:/]+)\]"),
|
|
349
|
+
re.compile(r"@([\w\-.:/]+)\s*;"),
|
|
350
|
+
re.compile(r";\s*@([\w\-.:/]*)\s*]"),
|
|
351
|
+
]
|
|
352
|
+
cite_key_list = combine_content_in_list(
|
|
353
|
+
[regex_list[i].findall("".join(data_list)) for i in range(len(regex_list))]
|
|
354
|
+
)
|
|
355
|
+
else:
|
|
356
|
+
print(f"{tex_md_flag} must be `.tex` or `.md`.")
|
|
357
|
+
|
|
358
|
+
cite_key_list = [c.strip() for c in cite_key_list if c.strip()]
|
|
359
|
+
return sorted(set(cite_key_list), key=cite_key_list.index)
|
|
360
|
+
|
|
361
|
+
def _cleanup_file(self, file_path: str) -> None:
|
|
362
|
+
"""Cleanup files and empty directories.
|
|
363
|
+
|
|
364
|
+
Parameters
|
|
365
|
+
----------
|
|
366
|
+
file_path : str
|
|
367
|
+
Path to file to be removed
|
|
368
|
+
|
|
369
|
+
Returns
|
|
370
|
+
-------
|
|
371
|
+
None
|
|
372
|
+
"""
|
|
373
|
+
if os.path.exists(file_path):
|
|
374
|
+
os.remove(file_path)
|
|
375
|
+
dir_path = os.path.dirname(file_path)
|
|
376
|
+
if dir_path != self.path_output: # Don't remove the main output directory
|
|
377
|
+
if len([f for f in os.listdir(dir_path) if f != ".DS_Store"]) == 0:
|
|
378
|
+
shutil.rmtree(dir_path)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import re
|
|
3
|
+
from typing import Any, Dict, List, Union
|
|
4
|
+
|
|
5
|
+
from pyadvtools import standard_path, transform_to_data_list, write_list
|
|
6
|
+
|
|
7
|
+
from ...bib.bibtexparser import Library
|
|
8
|
+
from ...main import PythonRunBib, PythonWriters
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def replace_to_standard_cite_keys(
|
|
12
|
+
full_tex_md: str, full_bib: str, path_output: str, options: Dict[str, Any]
|
|
13
|
+
) -> List[str]:
|
|
14
|
+
ext = os.path.splitext(full_tex_md)[-1]
|
|
15
|
+
if ext not in [".tex", ".md", "md", "tex"]:
|
|
16
|
+
print(f"{full_tex_md} must be `.tex` or `.md` file.")
|
|
17
|
+
return []
|
|
18
|
+
|
|
19
|
+
path_output = standard_path(path_output)
|
|
20
|
+
|
|
21
|
+
bib_data = transform_to_data_list(full_bib, ".bib")
|
|
22
|
+
old_key_new_entry_dict = generate_old_key_new_entry_dict(bib_data, options)
|
|
23
|
+
|
|
24
|
+
data = "".join(transform_to_data_list(full_tex_md, ext))
|
|
25
|
+
for old_key, new_entry in old_key_new_entry_dict.items():
|
|
26
|
+
if ext == ".tex":
|
|
27
|
+
data = re.sub(r"\\cite([a-z]*){\s*" + old_key + r"\s*}", r"\\cite\1{" + new_entry.key + "}", data)
|
|
28
|
+
data = re.sub(r"\\cite([a-z]*){\s*" + old_key + r"\s*,", r"\\cite\1{" + new_entry.key + ",", data)
|
|
29
|
+
data = re.sub(r",\s*" + old_key + r"\s*,", r"," + new_entry.key + r",", data)
|
|
30
|
+
data = re.sub(r",\s*" + old_key + r"\s*}", r"," + new_entry.key + "}", data)
|
|
31
|
+
elif ext == ".md":
|
|
32
|
+
data = re.sub(r"\[@\s*" + old_key + r"\s*\]", r"[@" + new_entry.key + "]", data)
|
|
33
|
+
data = re.sub(r"\[@\s*" + old_key + r"\s*,", r"[@" + new_entry.key + ",", data)
|
|
34
|
+
data = re.sub(r",\s*" + old_key + r"\s*,", r"," + new_entry.key + r",", data)
|
|
35
|
+
data = re.sub(r",\s*" + old_key + r"\s*\]", r"," + new_entry.key + "]", data)
|
|
36
|
+
else:
|
|
37
|
+
pass
|
|
38
|
+
data_list = data.splitlines(keepends=True)
|
|
39
|
+
write_list(data_list, f"new{ext}", "w", path_output, False)
|
|
40
|
+
|
|
41
|
+
_options = {}
|
|
42
|
+
_options.update(options)
|
|
43
|
+
_options["is_sort_blocks"] = False # default is True
|
|
44
|
+
_python_write = PythonWriters(_options)
|
|
45
|
+
_python_write.write_to_file(list(old_key_new_entry_dict.values()), "new.bib", "w", path_output, False)
|
|
46
|
+
return data_list
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def generate_old_key_new_entry_dict(bib_data: Union[List[str], str], options: Dict[str, Any]) -> dict:
|
|
50
|
+
# generate library
|
|
51
|
+
_options = {}
|
|
52
|
+
_options.update(options)
|
|
53
|
+
_options["generate_entry_cite_keys"] = False # default is False
|
|
54
|
+
_python_bib = PythonRunBib(_options)
|
|
55
|
+
library = _python_bib.parse_to_single_standard_library(bib_data)
|
|
56
|
+
|
|
57
|
+
_options = {}
|
|
58
|
+
_options.update(options)
|
|
59
|
+
_options["generate_entry_cite_keys"] = True # default is False
|
|
60
|
+
_python_bib = PythonRunBib(_options)
|
|
61
|
+
|
|
62
|
+
old_key_new_entry_dict = {}
|
|
63
|
+
generate_cite_keys: List[str] = []
|
|
64
|
+
for old_key in (entries_dict := library.entries_dict):
|
|
65
|
+
new_library = _python_bib.parse_to_single_standard_library(Library([entries_dict[old_key]]))
|
|
66
|
+
if len(new_library.entries) == 1:
|
|
67
|
+
new_entry = new_library.entries[0]
|
|
68
|
+
|
|
69
|
+
# update cite key
|
|
70
|
+
new_key = new_entry.key
|
|
71
|
+
while new_key in generate_cite_keys:
|
|
72
|
+
new_key += "-a"
|
|
73
|
+
new_entry.key = new_key
|
|
74
|
+
|
|
75
|
+
# save
|
|
76
|
+
generate_cite_keys.append(new_entry.key)
|
|
77
|
+
old_key_new_entry_dict[old_key] = new_entry
|
|
78
|
+
|
|
79
|
+
else:
|
|
80
|
+
old_key_new_entry_dict[old_key] = entries_dict[old_key]
|
|
81
|
+
return old_key_new_entry_dict
|