pyeasyphd 0.4.12__py3-none-any.whl → 0.4.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyeasyphd might be problematic. Click here for more details.
- pyeasyphd/main/basic_input.py +17 -17
- pyeasyphd/main/pandoc_md_to.py +30 -31
- pyeasyphd/main/python_run_md.py +30 -31
- pyeasyphd/main/python_run_tex.py +17 -17
- pyeasyphd/pyeasyphd.py +1 -1
- pyeasyphd/scripts/_base.py +15 -17
- pyeasyphd/scripts/run_article_md.py +12 -11
- pyeasyphd/scripts/run_article_tex.py +12 -12
- pyeasyphd/scripts/run_beamer_tex.py +12 -12
- pyeasyphd/scripts/run_compare.py +2 -4
- pyeasyphd/scripts/run_format.py +2 -4
- pyeasyphd/scripts/run_replace.py +3 -6
- pyeasyphd/scripts/run_search.py +10 -16
- pyeasyphd/tools/generate/generate_from_bibs.py +23 -17
- pyeasyphd/tools/generate/generate_html.py +20 -16
- pyeasyphd/tools/generate/generate_library.py +27 -24
- pyeasyphd/tools/generate/generate_links.py +30 -31
- pyeasyphd/tools/py_run_bib_md_tex.py +50 -31
- pyeasyphd/tools/search/data.py +3 -3
- pyeasyphd/tools/search/search_base.py +12 -13
- pyeasyphd/tools/search/search_core.py +32 -29
- pyeasyphd/tools/search/search_keywords.py +6 -6
- pyeasyphd/tools/search/search_writers.py +29 -30
- pyeasyphd/tools/search/utils.py +9 -10
- pyeasyphd/utils/utils.py +1 -2
- {pyeasyphd-0.4.12.dist-info → pyeasyphd-0.4.14.dist-info}/METADATA +2 -2
- pyeasyphd-0.4.14.dist-info/RECORD +53 -0
- pyeasyphd-0.4.12.dist-info/RECORD +0 -53
- {pyeasyphd-0.4.12.dist-info → pyeasyphd-0.4.14.dist-info}/WHEEL +0 -0
- {pyeasyphd-0.4.12.dist-info → pyeasyphd-0.4.14.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,12 @@
|
|
|
1
|
-
# coding=utf-8
|
|
2
|
-
|
|
3
1
|
import json
|
|
4
2
|
import os
|
|
3
|
+
import subprocess
|
|
5
4
|
from datetime import datetime
|
|
6
|
-
from typing import Dict, List
|
|
7
5
|
|
|
8
6
|
from pyadvtools import standard_path
|
|
9
7
|
|
|
10
8
|
|
|
11
|
-
class PaperLinksGenerator
|
|
9
|
+
class PaperLinksGenerator:
|
|
12
10
|
"""Generate markdown files with paper links from JSON data."""
|
|
13
11
|
|
|
14
12
|
def __init__(
|
|
@@ -46,11 +44,11 @@ class PaperLinksGenerator(object):
|
|
|
46
44
|
keywords_list, keywords_category_name = [], ""
|
|
47
45
|
|
|
48
46
|
self.keywords_category_name = keywords_category_name
|
|
49
|
-
self.keywords_list:
|
|
47
|
+
self.keywords_list: list[str] = keywords_list
|
|
50
48
|
|
|
51
49
|
self.display_year_period = display_year_period
|
|
52
50
|
|
|
53
|
-
def generate_yearly_links(self, cj: str, folder_name=os.path.join("data", "Yearly")) -> None:
|
|
51
|
+
def generate_yearly_links(self, cj: str, folder_name: str = os.path.join("data", "Yearly")) -> None:
|
|
54
52
|
"""Generate yearly markdown table with paper links.
|
|
55
53
|
|
|
56
54
|
Args:
|
|
@@ -62,7 +60,7 @@ class PaperLinksGenerator(object):
|
|
|
62
60
|
|
|
63
61
|
self._generate_links(cj, flags, folder_flags, folder_name)
|
|
64
62
|
|
|
65
|
-
def generate_monthly_links(self, folder_name=os.path.join("data", "Monthly")) -> None:
|
|
63
|
+
def generate_monthly_links(self, folder_name: str = os.path.join("data", "Monthly")) -> None:
|
|
66
64
|
"""Generate monthly markdown table with journal paper links.
|
|
67
65
|
|
|
68
66
|
Args:
|
|
@@ -75,7 +73,7 @@ class PaperLinksGenerator(object):
|
|
|
75
73
|
|
|
76
74
|
self._generate_links(cj, flags, folder_flags, folder_name)
|
|
77
75
|
|
|
78
|
-
def generate_weekly_links(self, folder_name=os.path.join("data", "Weekly")) -> None:
|
|
76
|
+
def generate_weekly_links(self, folder_name: str = os.path.join("data", "Weekly")) -> None:
|
|
79
77
|
"""Generate weekly markdown table with journal paper links.
|
|
80
78
|
|
|
81
79
|
Args:
|
|
@@ -117,12 +115,13 @@ class PaperLinksGenerator(object):
|
|
|
117
115
|
file_html = os.path.join(base_path, f"{file_name}.html")
|
|
118
116
|
|
|
119
117
|
try:
|
|
120
|
-
|
|
118
|
+
cmd = f"pandoc {file_md} -o {file_html}"
|
|
119
|
+
subprocess.run(cmd.split(), check=True, capture_output=True, text=True)
|
|
121
120
|
os.remove(file_md)
|
|
122
|
-
except
|
|
123
|
-
print(
|
|
121
|
+
except subprocess.CalledProcessError as e:
|
|
122
|
+
print("Pandoc error in pandoc md to html:", e.stderr)
|
|
124
123
|
|
|
125
|
-
def generate_ieee_early_access_links(self, folder_name=os.path.join("data", "Weekly")) -> None:
|
|
124
|
+
def generate_ieee_early_access_links(self, folder_name: str = os.path.join("data", "Weekly")) -> None:
|
|
126
125
|
"""Generate markdown for IEEE Early Access papers."""
|
|
127
126
|
md_content = [
|
|
128
127
|
"# Papers from Early Access\n\n",
|
|
@@ -137,7 +136,7 @@ class PaperLinksGenerator(object):
|
|
|
137
136
|
|
|
138
137
|
return None
|
|
139
138
|
|
|
140
|
-
def _load_json_data(self, file_name: str) ->
|
|
139
|
+
def _load_json_data(self, file_name: str) -> dict:
|
|
141
140
|
"""Load JSON data from file."""
|
|
142
141
|
try:
|
|
143
142
|
if file_name.lower().strip() == "conferences":
|
|
@@ -152,25 +151,25 @@ class PaperLinksGenerator(object):
|
|
|
152
151
|
if not os.path.exists(file_path):
|
|
153
152
|
return {}
|
|
154
153
|
|
|
155
|
-
with open(file_path,
|
|
154
|
+
with open(file_path, encoding="utf-8") as file:
|
|
156
155
|
return json.load(file)
|
|
157
156
|
|
|
158
157
|
except Exception as e:
|
|
159
158
|
print(f"Error loading {file_name}.json: {e}")
|
|
160
159
|
return {}
|
|
161
160
|
|
|
162
|
-
def _get_yearly_flags(self, cj: str) ->
|
|
161
|
+
def _get_yearly_flags(self, cj: str) -> list[str]:
|
|
163
162
|
"""Get yearly flags based on publication type."""
|
|
164
163
|
current_year = datetime.now().year
|
|
165
164
|
years = [str(y) for y in range(current_year - self.display_year_period, current_year)]
|
|
166
165
|
flags = sorted(years, reverse=True)
|
|
167
166
|
|
|
168
167
|
if cj.lower() == "conferences":
|
|
169
|
-
flags = [str(current_year)
|
|
168
|
+
flags = [str(current_year), *flags]
|
|
170
169
|
|
|
171
170
|
return flags
|
|
172
171
|
|
|
173
|
-
def _create_md_header_publisher(self, cj: str, flags:
|
|
172
|
+
def _create_md_header_publisher(self, cj: str, flags: list[str]) -> list[str]:
|
|
174
173
|
"""Create markdown table header."""
|
|
175
174
|
return [
|
|
176
175
|
f"# Papers from {cj.title()} of Different Publishers\n\n",
|
|
@@ -178,7 +177,7 @@ class PaperLinksGenerator(object):
|
|
|
178
177
|
f"|-|{'|'.join('-' for _ in flags)}|\n",
|
|
179
178
|
]
|
|
180
179
|
|
|
181
|
-
def _create_md_header_abbr(self, cj: str, flags:
|
|
180
|
+
def _create_md_header_abbr(self, cj: str, flags: list[str]) -> list[str]:
|
|
182
181
|
"""Create markdown table header."""
|
|
183
182
|
return [
|
|
184
183
|
f"# Papers from {cj.title()} of Different Publishers\n\n",
|
|
@@ -188,8 +187,8 @@ class PaperLinksGenerator(object):
|
|
|
188
187
|
|
|
189
188
|
# publisher
|
|
190
189
|
def _generate_table_rows_publisher(
|
|
191
|
-
self, json_data:
|
|
192
|
-
) ->
|
|
190
|
+
self, json_data: dict, cj: str, folder_flags: list[str], period: str
|
|
191
|
+
) -> list[str]:
|
|
193
192
|
"""Generate markdown table rows."""
|
|
194
193
|
rows = []
|
|
195
194
|
idx = 1
|
|
@@ -202,7 +201,7 @@ class PaperLinksGenerator(object):
|
|
|
202
201
|
|
|
203
202
|
return rows
|
|
204
203
|
|
|
205
|
-
def _get_link_cells_publisher(self, publisher: str, cj: str, folder_flags:
|
|
204
|
+
def _get_link_cells_publisher(self, publisher: str, cj: str, folder_flags: list[str], period: str) -> list[str]:
|
|
206
205
|
"""Get link cells for a publisher."""
|
|
207
206
|
cells = []
|
|
208
207
|
|
|
@@ -218,7 +217,7 @@ class PaperLinksGenerator(object):
|
|
|
218
217
|
return cells
|
|
219
218
|
|
|
220
219
|
# abbr
|
|
221
|
-
def _generate_table_rows_abbr(self, json_data:
|
|
220
|
+
def _generate_table_rows_abbr(self, json_data: dict, cj: str, folder_flags: list[str], period: str) -> list[str]:
|
|
222
221
|
"""Generate markdown table rows."""
|
|
223
222
|
rows = []
|
|
224
223
|
idx = 1
|
|
@@ -236,8 +235,8 @@ class PaperLinksGenerator(object):
|
|
|
236
235
|
return rows
|
|
237
236
|
|
|
238
237
|
def _get_link_cells_abbr(
|
|
239
|
-
self, publisher: str, abbr: str, cj: str, folder_flags:
|
|
240
|
-
) ->
|
|
238
|
+
self, publisher: str, abbr: str, cj: str, folder_flags: list[str], period: str
|
|
239
|
+
) -> list[str]:
|
|
241
240
|
"""Get link cells for a abbr."""
|
|
242
241
|
cells = []
|
|
243
242
|
for flag in folder_flags:
|
|
@@ -250,7 +249,7 @@ class PaperLinksGenerator(object):
|
|
|
250
249
|
|
|
251
250
|
return cells
|
|
252
251
|
|
|
253
|
-
def _get_ieee_links(self, folder_name=os.path.join("data", "Weekly")) ->
|
|
252
|
+
def _get_ieee_links(self, folder_name: str = os.path.join("data", "Weekly")) -> list[str]:
|
|
254
253
|
"""Get IEEE Early Access links."""
|
|
255
254
|
links = []
|
|
256
255
|
link_paths = [
|
|
@@ -267,7 +266,7 @@ class PaperLinksGenerator(object):
|
|
|
267
266
|
|
|
268
267
|
return links
|
|
269
268
|
|
|
270
|
-
def _write_md_file(self, content:
|
|
269
|
+
def _write_md_file(self, content: list[str], period: str, file_name: str) -> None:
|
|
271
270
|
"""Write markdown content to file."""
|
|
272
271
|
if len(content) == 0:
|
|
273
272
|
return None
|
|
@@ -282,25 +281,25 @@ class PaperLinksGenerator(object):
|
|
|
282
281
|
|
|
283
282
|
return None
|
|
284
283
|
|
|
285
|
-
def generate_keywords_links_weekly(self, cj: str, folder_name=os.path.join("data", "Weekly")):
|
|
284
|
+
def generate_keywords_links_weekly(self, cj: str, folder_name: str = os.path.join("data", "Weekly")):
|
|
286
285
|
flags = ["Current Issue", "Current Month"]
|
|
287
286
|
folder_flags = [f"current_year_{f.replace(' ', '_').lower()}" for f in flags]
|
|
288
287
|
|
|
289
288
|
self._generate_keywords_links(cj, folder_name, flags, folder_flags)
|
|
290
289
|
|
|
291
|
-
def generate_keywords_links_monthly(self, cj: str, folder_name=os.path.join("data", "Monthly")):
|
|
290
|
+
def generate_keywords_links_monthly(self, cj: str, folder_name: str = os.path.join("data", "Monthly")):
|
|
292
291
|
flags = ["All Months"]
|
|
293
292
|
folder_flags = [f"current_year_{f.replace(' ', '_').lower()}" for f in flags]
|
|
294
293
|
|
|
295
294
|
self._generate_keywords_links(cj, folder_name, flags, folder_flags)
|
|
296
295
|
|
|
297
|
-
def generate_keywords_links_yearly(self, cj: str, folder_name=os.path.join("data", "Yearly")):
|
|
296
|
+
def generate_keywords_links_yearly(self, cj: str, folder_name: str = os.path.join("data", "Yearly")):
|
|
298
297
|
flags = self._get_yearly_flags(cj)
|
|
299
298
|
folder_flags = [f"{f}_all_months" for f in flags]
|
|
300
299
|
|
|
301
300
|
self._generate_keywords_links(cj, folder_name, flags, folder_flags)
|
|
302
301
|
|
|
303
|
-
def _generate_keywords_links(self, cj: str, folder_name: str, flags:
|
|
302
|
+
def _generate_keywords_links(self, cj: str, folder_name: str, flags: list[str], folder_flags: list[str]):
|
|
304
303
|
json_data = self._load_json_data(cj.title())
|
|
305
304
|
if not json_data:
|
|
306
305
|
return None
|
|
@@ -385,7 +384,7 @@ class PaperLinksGenerator(object):
|
|
|
385
384
|
cleaned_keywords = {}
|
|
386
385
|
for category, words in keywords_dict.items():
|
|
387
386
|
if category.strip():
|
|
388
|
-
sorted_words = sorted(
|
|
387
|
+
sorted_words = sorted({word.strip() for word in words if word.strip()})
|
|
389
388
|
cleaned_keywords[category.strip()] = sorted_words
|
|
390
389
|
|
|
391
390
|
# For category
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import re
|
|
3
3
|
import shutil
|
|
4
|
-
from typing import Any
|
|
4
|
+
from typing import Any
|
|
5
5
|
|
|
6
6
|
from pyadvtools import combine_content_in_list, read_list, standard_path, write_list
|
|
7
7
|
from pybibtexer.bib.bibtexparser import Library
|
|
@@ -18,7 +18,11 @@ class PyRunBibMdTex(BasicInput):
|
|
|
18
18
|
"""
|
|
19
19
|
|
|
20
20
|
def __init__(
|
|
21
|
-
self,
|
|
21
|
+
self,
|
|
22
|
+
path_output: str,
|
|
23
|
+
tex_md_flag: str = ".md",
|
|
24
|
+
template_name: str = "paper",
|
|
25
|
+
options: dict[str, Any] | None = None,
|
|
22
26
|
) -> None:
|
|
23
27
|
"""Initialize the PyRunBibMdTex instance.
|
|
24
28
|
|
|
@@ -26,11 +30,14 @@ class PyRunBibMdTex(BasicInput):
|
|
|
26
30
|
path_output (str): Output directory path for processed files.
|
|
27
31
|
tex_md_flag (str, optional): Flag indicating whether to process as LaTeX (".tex") or Markdown (".md"). Defaults to ".md".
|
|
28
32
|
template_name (str, optional): Template type to use ("paper" or "beamer"). Defaults to "paper".
|
|
29
|
-
options (
|
|
33
|
+
options (dict[str, Any], optional): Additional configuration options. Defaults to {}.
|
|
30
34
|
|
|
31
35
|
Raises:
|
|
32
36
|
AssertionError: If tex_md_flag is not ".tex" or ".md" or if template_name is not "paper" or "beamer".
|
|
33
37
|
"""
|
|
38
|
+
if options is None:
|
|
39
|
+
options = {}
|
|
40
|
+
|
|
34
41
|
super().__init__(options)
|
|
35
42
|
|
|
36
43
|
self.tex_md_flag = re.sub(r"\.+", ".", "." + tex_md_flag)
|
|
@@ -85,21 +92,21 @@ class PyRunBibMdTex(BasicInput):
|
|
|
85
92
|
self._python_tex = PythonRunTex(self.options)
|
|
86
93
|
|
|
87
94
|
def run_files(
|
|
88
|
-
self, file_list_md_tex:
|
|
89
|
-
) ->
|
|
95
|
+
self, file_list_md_tex: list[str], output_prefix: str = "", output_level: str = "next"
|
|
96
|
+
) -> tuple[list[str], list[str]]:
|
|
90
97
|
"""Process a list of Markdown or LaTeX files.
|
|
91
98
|
|
|
92
99
|
Args:
|
|
93
|
-
file_list_md_tex (
|
|
100
|
+
file_list_md_tex (list[str]): list of input file paths (Markdown or LaTeX).
|
|
94
101
|
output_prefix (str, optional): Prefix for output files. Defaults to "".
|
|
95
102
|
output_level (str, optional): Output directory level ("previous", "current", or "next"). Defaults to "next".
|
|
96
103
|
|
|
97
104
|
Returns:
|
|
98
|
-
|
|
105
|
+
tuple[list[str], list[str]]: Tuple containing processed Markdown content and LaTeX content.
|
|
99
106
|
"""
|
|
100
107
|
file_list_md_tex = [f for f in file_list_md_tex if f.endswith(self.tex_md_flag)]
|
|
101
108
|
data_list_list = [read_list(standard_path(f), "r") for f in file_list_md_tex]
|
|
102
|
-
if all(
|
|
109
|
+
if all(len(data_list) == 0 for data_list in data_list_list):
|
|
103
110
|
return [], []
|
|
104
111
|
|
|
105
112
|
file_base_name = os.path.splitext(os.path.basename(file_list_md_tex[0]))[0]
|
|
@@ -115,20 +122,20 @@ class PyRunBibMdTex(BasicInput):
|
|
|
115
122
|
def python_run_bib_md_tex(
|
|
116
123
|
self,
|
|
117
124
|
output_prefix: str,
|
|
118
|
-
data_list_md_tex:
|
|
119
|
-
original_bib_data:
|
|
125
|
+
data_list_md_tex: list[str],
|
|
126
|
+
original_bib_data: list[str] | str | Library,
|
|
120
127
|
output_level: str = "next",
|
|
121
|
-
) ->
|
|
128
|
+
) -> tuple[list[str], list[str]]:
|
|
122
129
|
"""Process BibTeX, Markdown and LaTeX content.
|
|
123
130
|
|
|
124
131
|
Args:
|
|
125
132
|
output_prefix (str): Prefix for output files.
|
|
126
|
-
data_list_md_tex (
|
|
127
|
-
original_bib_data (
|
|
133
|
+
data_list_md_tex (list[str]): list of content lines (Markdown or LaTeX).
|
|
134
|
+
original_bib_data (list[str] | str | Library): BibTeX data in various formats.
|
|
128
135
|
output_level (str, optional): Output directory level ("previous", "current", or "next"). Defaults to "next".
|
|
129
136
|
|
|
130
137
|
Returns:
|
|
131
|
-
|
|
138
|
+
tuple[list[str], list[str]]: Tuple containing processed Markdown content and LaTeX content.
|
|
132
139
|
"""
|
|
133
140
|
# Basic file names
|
|
134
141
|
output_tex, output_md = output_prefix + ".tex", output_prefix + ".md"
|
|
@@ -163,19 +170,19 @@ class PyRunBibMdTex(BasicInput):
|
|
|
163
170
|
self,
|
|
164
171
|
output_md: str,
|
|
165
172
|
output_tex: str,
|
|
166
|
-
data_list_md_tex:
|
|
167
|
-
original_bib_data:
|
|
168
|
-
) ->
|
|
173
|
+
data_list_md_tex: list[str],
|
|
174
|
+
original_bib_data: list[str] | str | Library,
|
|
175
|
+
) -> tuple[list[str], list[str]]:
|
|
169
176
|
"""Process BibTeX, Markdown and LaTeX content.
|
|
170
177
|
|
|
171
178
|
Args:
|
|
172
179
|
output_md (str): Output Markdown filename.
|
|
173
180
|
output_tex (str): Output LaTeX filename.
|
|
174
|
-
data_list_md_tex (
|
|
175
|
-
original_bib_data (
|
|
181
|
+
data_list_md_tex (list[str]): list of content lines (Markdown or LaTeX).
|
|
182
|
+
original_bib_data (list[str] | str | Library): BibTeX data in various formats.
|
|
176
183
|
|
|
177
184
|
Returns:
|
|
178
|
-
|
|
185
|
+
tuple[list[str], list[str]]: Tuple containing processed Markdown content and LaTeX content.
|
|
179
186
|
"""
|
|
180
187
|
# Copy figures if enabled
|
|
181
188
|
if self.shutil_includegraphics_figs:
|
|
@@ -209,7 +216,13 @@ class PyRunBibMdTex(BasicInput):
|
|
|
209
216
|
abbr_library, zotero_library, save_library = self._python_bib.parse_to_multi_standard_library(
|
|
210
217
|
original_bib_data, key_in_md_tex
|
|
211
218
|
)
|
|
212
|
-
|
|
219
|
+
|
|
220
|
+
# Update entries
|
|
221
|
+
abbr_library = self._update_library_by_entry_keys(abbr_library, key_in_md_tex)
|
|
222
|
+
zotero_library = self._update_library_by_entry_keys(zotero_library, key_in_md_tex)
|
|
223
|
+
save_library = self._update_library_by_entry_keys(save_library, key_in_md_tex)
|
|
224
|
+
|
|
225
|
+
key_in_md_tex = sorted(abbr_library.entries_dict.keys(), key=key_in_md_tex.index)
|
|
213
226
|
|
|
214
227
|
# Write bibliography files
|
|
215
228
|
_path_output = os.path.join(self.path_output, self.bib_folder_name)
|
|
@@ -265,17 +278,23 @@ class PyRunBibMdTex(BasicInput):
|
|
|
265
278
|
return data_list_md, data_list_tex
|
|
266
279
|
|
|
267
280
|
@staticmethod
|
|
268
|
-
def
|
|
281
|
+
def _update_library_by_entry_keys(library: Library, keys: list[str]):
|
|
282
|
+
remove_entries = [entry for entry in library.entries if entry.key not in keys]
|
|
283
|
+
library.remove(remove_entries)
|
|
284
|
+
return library
|
|
285
|
+
|
|
286
|
+
@staticmethod
|
|
287
|
+
def search_subfile_names(data_list: list[str], postfixes: list[str]) -> list[str]:
|
|
269
288
|
"""Search for figure filenames in content.
|
|
270
289
|
|
|
271
290
|
Args:
|
|
272
|
-
data_list (
|
|
273
|
-
figure_postfixes (Optional[
|
|
291
|
+
data_list (list[str]): list of content lines to search.
|
|
292
|
+
figure_postfixes (Optional[list[str]], optional): list of figure file extensions to look for. Defaults to None.
|
|
274
293
|
|
|
275
294
|
Returns:
|
|
276
|
-
|
|
295
|
+
list[str]: list of found figure filenames.
|
|
277
296
|
"""
|
|
278
|
-
regex = re.compile(rf
|
|
297
|
+
regex = re.compile(rf"[\w\-]+\.(?:{'|'.join(postfixes)})", re.I)
|
|
279
298
|
figure_names = []
|
|
280
299
|
for line in data_list:
|
|
281
300
|
figure_names.extend(regex.findall(line))
|
|
@@ -283,7 +302,7 @@ class PyRunBibMdTex(BasicInput):
|
|
|
283
302
|
|
|
284
303
|
@staticmethod
|
|
285
304
|
def shutil_copy_files(
|
|
286
|
-
path_file: str, file_names:
|
|
305
|
+
path_file: str, file_names: list[str], path_output: str, output_folder_name: str, relative_path: bool
|
|
287
306
|
) -> None:
|
|
288
307
|
"""Copy specified files from source directory to output directory.
|
|
289
308
|
|
|
@@ -292,7 +311,7 @@ class PyRunBibMdTex(BasicInput):
|
|
|
292
311
|
|
|
293
312
|
Args:
|
|
294
313
|
path_file: Source directory path to search for files.
|
|
295
|
-
file_names:
|
|
314
|
+
file_names: list of filenames to copy.
|
|
296
315
|
path_output: Destination directory path.
|
|
297
316
|
output_folder_name: Name of the subfolder in output directory (used when relative_path=False).
|
|
298
317
|
relative_path: If True, preserves relative path structure; if False, uses flat structure.
|
|
@@ -341,15 +360,15 @@ class PyRunBibMdTex(BasicInput):
|
|
|
341
360
|
return None
|
|
342
361
|
|
|
343
362
|
@staticmethod
|
|
344
|
-
def search_cite_keys(data_list:
|
|
363
|
+
def search_cite_keys(data_list: list[str], tex_md_flag: str = ".tex") -> list[str]:
|
|
345
364
|
r"""Extract citation keys from content according to their places.
|
|
346
365
|
|
|
347
366
|
Args:
|
|
348
|
-
data_list (
|
|
367
|
+
data_list (list[str]): list of content lines to search.
|
|
349
368
|
tex_md_flag (str, optional): Flag indicating content format (".tex" or ".md"). Defaults to ".tex".
|
|
350
369
|
|
|
351
370
|
Returns:
|
|
352
|
-
|
|
371
|
+
list[str]: list of found citation keys.
|
|
353
372
|
|
|
354
373
|
Note:
|
|
355
374
|
For LaTeX, searches for \\cite, \\citep, \\citet patterns.
|
pyeasyphd/tools/search/data.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
def obtain_search_keywords() ->
|
|
4
|
+
def obtain_search_keywords() -> dict[str, Any]:
|
|
5
5
|
"""Obtain search keywords dictionary.
|
|
6
6
|
|
|
7
7
|
Returns:
|
|
8
|
-
|
|
8
|
+
dict[str, Any]: dictionary containing categorized search keywords.
|
|
9
9
|
"""
|
|
10
10
|
_h_ = "(?:| |-)" # hyphen
|
|
11
11
|
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import copy
|
|
2
2
|
import re
|
|
3
|
-
from typing import Dict, List, Tuple
|
|
4
3
|
|
|
5
4
|
from pybibtexer.bib.bibtexparser import Library
|
|
6
5
|
from pybibtexer.main import PythonRunBib, PythonWriters
|
|
@@ -9,16 +8,16 @@ from ...main import BasicInput
|
|
|
9
8
|
from .search_writers import WriteInitialResult, WriteSeparateResult
|
|
10
9
|
|
|
11
10
|
|
|
12
|
-
def search_keywords_core(keywords_list_list:
|
|
11
|
+
def search_keywords_core(keywords_list_list: list[list[str]], library: Library, field: str) -> tuple[Library, Library]:
|
|
13
12
|
"""Search keywords in specified field such as title, abstract, or keywords.
|
|
14
13
|
|
|
15
14
|
Args:
|
|
16
|
-
keywords_list_list (
|
|
15
|
+
keywords_list_list (list[list[str]]): list of keyword lists to search for.
|
|
17
16
|
library (Library): Bibliography library to search.
|
|
18
17
|
field (str): Field to search in (e.g., 'title', 'abstract', 'keywords').
|
|
19
18
|
|
|
20
19
|
Returns:
|
|
21
|
-
|
|
20
|
+
tuple[Library, Library]: Tuple containing (matching_library, non_matching_library).
|
|
22
21
|
"""
|
|
23
22
|
search_library = []
|
|
24
23
|
no_search_library = []
|
|
@@ -31,10 +30,10 @@ def search_keywords_core(keywords_list_list: List[List[str]], library: Library,
|
|
|
31
30
|
content = re.sub("}", "", content)
|
|
32
31
|
|
|
33
32
|
# All keywords from keyword_list_list[0] should be found in bib
|
|
34
|
-
flag = all(
|
|
33
|
+
flag = all(re.search(keyword, content, flags=re.I) for keyword in keywords_list_list[0])
|
|
35
34
|
if flag and (len(keywords_list_list) == 2):
|
|
36
35
|
# Any keywords from keyword_list_list[1] found in bib will results in False flag.
|
|
37
|
-
flag = not any(
|
|
36
|
+
flag = not any(re.search(keyword, content, flags=re.I) for keyword in keywords_list_list[1])
|
|
38
37
|
|
|
39
38
|
if flag:
|
|
40
39
|
search_library.append(entry)
|
|
@@ -76,32 +75,32 @@ class SearchInitialResult(BasicInput):
|
|
|
76
75
|
|
|
77
76
|
def main(
|
|
78
77
|
self,
|
|
79
|
-
search_field_list:
|
|
78
|
+
search_field_list: list[str],
|
|
80
79
|
path_initial: str,
|
|
81
80
|
library: Library,
|
|
82
81
|
keywords_type: str,
|
|
83
|
-
keywords_list_list:
|
|
82
|
+
keywords_list_list: list[list[str]],
|
|
84
83
|
combine_keywords: str,
|
|
85
84
|
output_prefix: str,
|
|
86
85
|
path_separate: str,
|
|
87
|
-
) ->
|
|
86
|
+
) -> tuple[list[str], dict[str, list[list[str]]], dict[str, int], Library]:
|
|
88
87
|
"""Main search method for processing search results.
|
|
89
88
|
|
|
90
89
|
Args:
|
|
91
|
-
search_field_list (
|
|
90
|
+
search_field_list (list[str]): list of fields to search.
|
|
92
91
|
path_initial (str): Path to initial directory.
|
|
93
92
|
library (Library): Bibliography library to search.
|
|
94
93
|
keywords_type (str): Type of keywords being searched.
|
|
95
|
-
keywords_list_list (
|
|
94
|
+
keywords_list_list (list[list[str]]): list of keyword lists.
|
|
96
95
|
combine_keywords (str): Combined keywords string.
|
|
97
96
|
output_prefix (str): Prefix for output files.
|
|
98
97
|
path_separate (str): Path to separate directory.
|
|
99
98
|
|
|
100
99
|
Returns:
|
|
101
|
-
|
|
100
|
+
tuple[list[str], dict[str, list[list[str]]], dict[str, int], Library]: Tuple containing error messages, field data, field numbers, and remaining library.
|
|
102
101
|
"""
|
|
103
102
|
error_pandoc_md_md, field_data_dict, no_search_library = [], {}, library
|
|
104
|
-
field_number_dict:
|
|
103
|
+
field_number_dict: dict[str, int] = {}
|
|
105
104
|
|
|
106
105
|
for field in search_field_list:
|
|
107
106
|
if len(no_search_library.entries) == 0:
|