pyeasyphd 0.4.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pyeasyphd/.python-version +1 -0
- pyeasyphd/Main.sublime-menu +43 -0
- pyeasyphd/__init__.py +5 -0
- pyeasyphd/data/templates/csl/apa-no-ampersand.csl +2183 -0
- pyeasyphd/data/templates/csl/apa.csl +2133 -0
- pyeasyphd/data/templates/csl/ieee.csl +512 -0
- pyeasyphd/data/templates/tex/Article.tex +38 -0
- pyeasyphd/data/templates/tex/Article_Header.tex +29 -0
- pyeasyphd/data/templates/tex/Article_Tail.tex +3 -0
- pyeasyphd/data/templates/tex/Beamer_Header.tex +79 -0
- pyeasyphd/data/templates/tex/Beamer_Tail.tex +14 -0
- pyeasyphd/data/templates/tex/Style.tex +240 -0
- pyeasyphd/data/templates/tex/TEVC_Header.tex +52 -0
- pyeasyphd/data/templates/tex/TEVC_Tail.tex +4 -0
- pyeasyphd/data/templates/tex/eisvogel.tex +1064 -0
- pyeasyphd/data/templates/tex/math.tex +201 -0
- pyeasyphd/data/templates/tex/math_commands.tex +677 -0
- pyeasyphd/data/templates/tex/nextaimathmacros.sty +681 -0
- pyeasyphd/main/__init__.py +6 -0
- pyeasyphd/main/basic_input.py +101 -0
- pyeasyphd/main/pandoc_md_to.py +380 -0
- pyeasyphd/main/python_run_md.py +320 -0
- pyeasyphd/main/python_run_tex.py +200 -0
- pyeasyphd/pyeasyphd.py +86 -0
- pyeasyphd/pyeasyphd.sublime-settings +100 -0
- pyeasyphd/pyeasyphd.sublime-syntax +5 -0
- pyeasyphd/scripts/__init__.py +34 -0
- pyeasyphd/scripts/_base.py +65 -0
- pyeasyphd/scripts/run_article_md.py +101 -0
- pyeasyphd/scripts/run_article_tex.py +94 -0
- pyeasyphd/scripts/run_beamer_tex.py +84 -0
- pyeasyphd/scripts/run_compare.py +71 -0
- pyeasyphd/scripts/run_format.py +62 -0
- pyeasyphd/scripts/run_generate.py +211 -0
- pyeasyphd/scripts/run_replace.py +34 -0
- pyeasyphd/scripts/run_search.py +251 -0
- pyeasyphd/tools/__init__.py +12 -0
- pyeasyphd/tools/generate/generate_from_bibs.py +181 -0
- pyeasyphd/tools/generate/generate_html.py +166 -0
- pyeasyphd/tools/generate/generate_library.py +203 -0
- pyeasyphd/tools/generate/generate_links.py +400 -0
- pyeasyphd/tools/py_run_bib_md_tex.py +398 -0
- pyeasyphd/tools/search/data.py +282 -0
- pyeasyphd/tools/search/search_base.py +146 -0
- pyeasyphd/tools/search/search_core.py +400 -0
- pyeasyphd/tools/search/search_keywords.py +229 -0
- pyeasyphd/tools/search/search_writers.py +350 -0
- pyeasyphd/tools/search/utils.py +190 -0
- pyeasyphd/utils/utils.py +99 -0
- pyeasyphd-0.4.42.dist-info/METADATA +33 -0
- pyeasyphd-0.4.42.dist-info/RECORD +53 -0
- pyeasyphd-0.4.42.dist-info/WHEEL +4 -0
- pyeasyphd-0.4.42.dist-info/licenses/LICENSE +674 -0
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import subprocess
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
from pyadvtools import standard_path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class PaperLinksGenerator:
|
|
10
|
+
"""Generate markdown files with paper links from JSON data."""
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
full_json_c: str,
|
|
15
|
+
full_json_j: str,
|
|
16
|
+
full_json_k: str,
|
|
17
|
+
data_base_path: str,
|
|
18
|
+
keywords_category_name: str = "",
|
|
19
|
+
display_year_period: int = 10,
|
|
20
|
+
):
|
|
21
|
+
"""Initialize the generator with base paths.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
full_json_c (str): Path to conferences JSON file.
|
|
25
|
+
full_json_j (str): Path to journals JSON file.
|
|
26
|
+
full_json_k (str): Path to keywords JSON file.
|
|
27
|
+
data_base_path (str): Path to data files directory.
|
|
28
|
+
keywords_category_name (str, optional): Category name for keywords. Defaults to "".
|
|
29
|
+
display_year_period (int, optional): Number of years to display. Defaults to 10.
|
|
30
|
+
"""
|
|
31
|
+
self.full_json_c = full_json_c
|
|
32
|
+
self.full_json_j = full_json_j
|
|
33
|
+
self.full_json_k = full_json_k
|
|
34
|
+
|
|
35
|
+
self.data_base_path = standard_path(data_base_path)
|
|
36
|
+
|
|
37
|
+
# Process keyword category name and load data
|
|
38
|
+
keywords_category_name = keywords_category_name.lower().strip() if keywords_category_name else ""
|
|
39
|
+
category_prefix = f"{keywords_category_name}_" if keywords_category_name else ""
|
|
40
|
+
keywords_list = self._load_json_data("keywords").get(f"{category_prefix}keywords", [])
|
|
41
|
+
|
|
42
|
+
# Validate data availability
|
|
43
|
+
if not keywords_list or not keywords_category_name:
|
|
44
|
+
keywords_list, keywords_category_name = [], ""
|
|
45
|
+
|
|
46
|
+
self.keywords_category_name = keywords_category_name
|
|
47
|
+
self.keywords_list: list[str] = keywords_list
|
|
48
|
+
|
|
49
|
+
self.display_year_period = display_year_period
|
|
50
|
+
|
|
51
|
+
def generate_yearly_links(self, cj: str, folder_name: str = os.path.join("data", "Yearly")) -> None:
|
|
52
|
+
"""Generate yearly markdown table with paper links.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
cj (str): Publication type - 'conferences' or 'journals'.
|
|
56
|
+
folder_name (str, optional): Output folder name. Defaults to "data/Yearly".
|
|
57
|
+
"""
|
|
58
|
+
flags = self._get_yearly_flags(cj)
|
|
59
|
+
folder_flags = [f"{f}_all_months" for f in flags]
|
|
60
|
+
|
|
61
|
+
self._generate_links(cj, flags, folder_flags, folder_name)
|
|
62
|
+
|
|
63
|
+
def generate_monthly_links(self, folder_name: str = os.path.join("data", "Monthly")) -> None:
|
|
64
|
+
"""Generate monthly markdown table with journal paper links.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
folder_name (str, optional): Output folder name. Defaults to "data/Weekly".
|
|
68
|
+
"""
|
|
69
|
+
cj = "Journals"
|
|
70
|
+
|
|
71
|
+
flags = ["All Months"]
|
|
72
|
+
folder_flags = [f"current_year_{f.replace(' ', '_').lower()}" for f in flags]
|
|
73
|
+
|
|
74
|
+
self._generate_links(cj, flags, folder_flags, folder_name)
|
|
75
|
+
|
|
76
|
+
def generate_weekly_links(self, folder_name: str = os.path.join("data", "Weekly")) -> None:
|
|
77
|
+
"""Generate weekly markdown table with journal paper links.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
folder_name (str, optional): Output folder name. Defaults to "data/Weekly".
|
|
81
|
+
"""
|
|
82
|
+
cj = "Journals"
|
|
83
|
+
|
|
84
|
+
flags = ["Current Issue", "Current Month"]
|
|
85
|
+
folder_flags = [f"current_year_{f.replace(' ', '_').lower()}" for f in flags]
|
|
86
|
+
|
|
87
|
+
self._generate_links(cj, flags, folder_flags, folder_name)
|
|
88
|
+
|
|
89
|
+
def _generate_links(self, cj, flags, folder_flags, folder_name) -> None:
|
|
90
|
+
json_data = self._load_json_data(cj.lower())
|
|
91
|
+
if not json_data:
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
# publisher
|
|
95
|
+
md_content = self._create_md_header_publisher(cj, flags)
|
|
96
|
+
table_rows = self._generate_table_rows_publisher(json_data, cj, folder_flags, folder_name)
|
|
97
|
+
if table_rows:
|
|
98
|
+
md_content.extend(table_rows)
|
|
99
|
+
self._write_md_file(md_content, folder_name, f"{cj}_Publisher.md")
|
|
100
|
+
|
|
101
|
+
# abbr
|
|
102
|
+
md_content = self._create_md_header_abbr(cj, flags)
|
|
103
|
+
table_rows = self._generate_table_rows_abbr(json_data, cj, folder_flags, folder_name)
|
|
104
|
+
if table_rows:
|
|
105
|
+
md_content.extend(table_rows)
|
|
106
|
+
self._write_md_file(md_content, folder_name, f"{cj}_Abbreviation.md")
|
|
107
|
+
self._convert_md_to_html(folder_name, f"{cj}_Abbreviation")
|
|
108
|
+
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
def _convert_md_to_html(self, folder_name, file_name):
|
|
112
|
+
"""Convert markdown file to HTML using pandoc."""
|
|
113
|
+
base_path = os.path.join(self.data_base_path, f"{folder_name}")
|
|
114
|
+
file_md = os.path.join(base_path, f"{file_name}.md")
|
|
115
|
+
file_html = os.path.join(base_path, f"{file_name}.html")
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
cmd = f"pandoc {file_md} -o {file_html}"
|
|
119
|
+
subprocess.run(cmd.split(), check=True, capture_output=True, text=True)
|
|
120
|
+
os.remove(file_md)
|
|
121
|
+
except subprocess.CalledProcessError as e:
|
|
122
|
+
print("Pandoc error in pandoc md to html:", e.stderr)
|
|
123
|
+
|
|
124
|
+
def generate_ieee_early_access_links(self, folder_name: str = os.path.join("data", "Weekly")) -> None:
|
|
125
|
+
"""Generate markdown for IEEE Early Access papers."""
|
|
126
|
+
md_content = [
|
|
127
|
+
"# Papers from Early Access\n\n",
|
|
128
|
+
"|Publisher|**Current Month Papers**|**All Papers**|\n",
|
|
129
|
+
"|-|-|-|\n",
|
|
130
|
+
]
|
|
131
|
+
|
|
132
|
+
links = self._get_ieee_links()
|
|
133
|
+
if any(links):
|
|
134
|
+
md_content.append(f"|IEEE|{links[0]}|{links[1]}|\n")
|
|
135
|
+
self._write_md_file(md_content, folder_name, "Journals_Early_Access.md")
|
|
136
|
+
|
|
137
|
+
return None
|
|
138
|
+
|
|
139
|
+
def _load_json_data(self, file_name: str) -> dict:
|
|
140
|
+
"""Load JSON data from file."""
|
|
141
|
+
try:
|
|
142
|
+
if file_name.lower().strip() == "conferences":
|
|
143
|
+
file_path = os.path.expanduser(self.full_json_c)
|
|
144
|
+
elif file_name.lower().strip() == "journals":
|
|
145
|
+
file_path = os.path.expanduser(self.full_json_j)
|
|
146
|
+
elif file_name.lower().strip() == "keywords":
|
|
147
|
+
file_path = os.path.expanduser(self.full_json_k)
|
|
148
|
+
else:
|
|
149
|
+
file_path = ""
|
|
150
|
+
|
|
151
|
+
if not os.path.exists(file_path):
|
|
152
|
+
return {}
|
|
153
|
+
|
|
154
|
+
with open(file_path, encoding="utf-8", newline="\n") as file:
|
|
155
|
+
return json.load(file)
|
|
156
|
+
|
|
157
|
+
except Exception as e:
|
|
158
|
+
print(f"Error loading {file_name}.json: {e}")
|
|
159
|
+
return {}
|
|
160
|
+
|
|
161
|
+
def _get_yearly_flags(self, cj: str) -> list[str]:
|
|
162
|
+
"""Get yearly flags based on publication type."""
|
|
163
|
+
current_year = datetime.now().year
|
|
164
|
+
years = [str(y) for y in range(current_year - self.display_year_period, current_year)]
|
|
165
|
+
flags = sorted(years, reverse=True)
|
|
166
|
+
|
|
167
|
+
if cj.lower() == "conferences":
|
|
168
|
+
flags = [str(current_year), *flags]
|
|
169
|
+
|
|
170
|
+
return flags
|
|
171
|
+
|
|
172
|
+
def _create_md_header_publisher(self, cj: str, flags: list[str]) -> list[str]:
|
|
173
|
+
"""Create markdown table header."""
|
|
174
|
+
return [
|
|
175
|
+
f"# Papers from {cj.title()} of Different Publishers\n\n",
|
|
176
|
+
f"| | {'|'.join(f'**{f}**' for f in flags)}|\n",
|
|
177
|
+
f"|-|{'|'.join('-' for _ in flags)}|\n",
|
|
178
|
+
]
|
|
179
|
+
|
|
180
|
+
def _create_md_header_abbr(self, cj: str, flags: list[str]) -> list[str]:
|
|
181
|
+
"""Create markdown table header."""
|
|
182
|
+
return [
|
|
183
|
+
f"# Papers from {cj.title()} of Different Publishers\n\n",
|
|
184
|
+
f"| |Publishers|{'|'.join(f'**{f}**' for f in flags)}|\n",
|
|
185
|
+
f"|-|-|{'|'.join('-' for _ in flags)}|\n",
|
|
186
|
+
]
|
|
187
|
+
|
|
188
|
+
# publisher
|
|
189
|
+
def _generate_table_rows_publisher(
|
|
190
|
+
self, json_data: dict, cj: str, folder_flags: list[str], period: str
|
|
191
|
+
) -> list[str]:
|
|
192
|
+
"""Generate markdown table rows."""
|
|
193
|
+
rows = []
|
|
194
|
+
idx = 1
|
|
195
|
+
|
|
196
|
+
for publisher in json_data:
|
|
197
|
+
cells = self._get_link_cells_publisher(publisher, cj, folder_flags, period)
|
|
198
|
+
if any(cells):
|
|
199
|
+
rows.append(f"|{idx}|{'|'.join(cells)}|\n")
|
|
200
|
+
idx += 1
|
|
201
|
+
|
|
202
|
+
return rows
|
|
203
|
+
|
|
204
|
+
def _get_link_cells_publisher(self, publisher: str, cj: str, folder_flags: list[str], period: str) -> list[str]:
|
|
205
|
+
"""Get link cells for a publisher."""
|
|
206
|
+
cells = []
|
|
207
|
+
|
|
208
|
+
for flag in folder_flags:
|
|
209
|
+
link_path = os.path.join(period, cj, flag, publisher.lower(), f"{publisher.lower()}_link.html")
|
|
210
|
+
full_path = os.path.join(self.data_base_path, link_path)
|
|
211
|
+
|
|
212
|
+
if os.path.exists(full_path):
|
|
213
|
+
cells.append(f"[{publisher}]({link_path})")
|
|
214
|
+
else:
|
|
215
|
+
cells.append("")
|
|
216
|
+
|
|
217
|
+
return cells
|
|
218
|
+
|
|
219
|
+
# abbr
|
|
220
|
+
def _generate_table_rows_abbr(self, json_data: dict, cj: str, folder_flags: list[str], period: str) -> list[str]:
|
|
221
|
+
"""Generate markdown table rows."""
|
|
222
|
+
rows = []
|
|
223
|
+
idx = 1
|
|
224
|
+
|
|
225
|
+
for publisher in json_data:
|
|
226
|
+
if cj.lower() not in json_data[publisher]:
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
for abbr in json_data[publisher][cj.lower()]:
|
|
230
|
+
cells = self._get_link_cells_abbr(publisher, abbr, cj, folder_flags, period)
|
|
231
|
+
if any(cells):
|
|
232
|
+
rows.append(f"|{idx}|{publisher}|{'|'.join(cells)}|\n")
|
|
233
|
+
idx += 1
|
|
234
|
+
|
|
235
|
+
return rows
|
|
236
|
+
|
|
237
|
+
def _get_link_cells_abbr(
|
|
238
|
+
self, publisher: str, abbr: str, cj: str, folder_flags: list[str], period: str
|
|
239
|
+
) -> list[str]:
|
|
240
|
+
"""Get link cells for a abbr."""
|
|
241
|
+
cells = []
|
|
242
|
+
for flag in folder_flags:
|
|
243
|
+
link_path = os.path.join(period, cj, flag, publisher.lower(), abbr, f"{abbr}.html")
|
|
244
|
+
full_path = os.path.join(self.data_base_path, link_path)
|
|
245
|
+
if os.path.exists(full_path):
|
|
246
|
+
cells.append(f"[{abbr}]({link_path})")
|
|
247
|
+
else:
|
|
248
|
+
cells.append("")
|
|
249
|
+
|
|
250
|
+
return cells
|
|
251
|
+
|
|
252
|
+
def _get_ieee_links(self, folder_name: str = os.path.join("data", "Weekly")) -> list[str]:
|
|
253
|
+
"""Get IEEE Early Access links."""
|
|
254
|
+
links = []
|
|
255
|
+
link_paths = [
|
|
256
|
+
os.path.join(folder_name, "Journals_Early_Access", "current_year_current_month", "ieee", "ieee_link.html"),
|
|
257
|
+
os.path.join(folder_name, "Journals_Early_Access", "all_years_all_months", "ieee", "ieee_link.html"),
|
|
258
|
+
]
|
|
259
|
+
|
|
260
|
+
for link_path in link_paths:
|
|
261
|
+
full_path = os.path.join(self.data_base_path, link_path)
|
|
262
|
+
if os.path.exists(full_path):
|
|
263
|
+
links.append(f"[IEEE Early Access]({link_path})")
|
|
264
|
+
else:
|
|
265
|
+
links.append("")
|
|
266
|
+
|
|
267
|
+
return links
|
|
268
|
+
|
|
269
|
+
def _write_md_file(self, content: list[str], period: str, file_name: str) -> None:
|
|
270
|
+
"""Write markdown content to file."""
|
|
271
|
+
if len(content) == 0:
|
|
272
|
+
return None
|
|
273
|
+
|
|
274
|
+
output_dir = os.path.join(self.data_base_path, period)
|
|
275
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
276
|
+
|
|
277
|
+
output_file = os.path.join(output_dir, file_name)
|
|
278
|
+
with open(output_file, "w", newline="\n") as f:
|
|
279
|
+
f.writelines(content)
|
|
280
|
+
print(f"Generated: {output_file}")
|
|
281
|
+
|
|
282
|
+
return None
|
|
283
|
+
|
|
284
|
+
def generate_keywords_links_weekly(self, cj: str, folder_name: str = os.path.join("data", "Weekly")):
|
|
285
|
+
flags = ["Current Issue", "Current Month"]
|
|
286
|
+
folder_flags = [f"current_year_{f.replace(' ', '_').lower()}" for f in flags]
|
|
287
|
+
|
|
288
|
+
self._generate_keywords_links(cj, folder_name, flags, folder_flags)
|
|
289
|
+
|
|
290
|
+
def generate_keywords_links_monthly(self, cj: str, folder_name: str = os.path.join("data", "Monthly")):
|
|
291
|
+
flags = ["All Months"]
|
|
292
|
+
folder_flags = [f"current_year_{f.replace(' ', '_').lower()}" for f in flags]
|
|
293
|
+
|
|
294
|
+
self._generate_keywords_links(cj, folder_name, flags, folder_flags)
|
|
295
|
+
|
|
296
|
+
def generate_keywords_links_yearly(self, cj: str, folder_name: str = os.path.join("data", "Yearly")):
|
|
297
|
+
flags = self._get_yearly_flags(cj)
|
|
298
|
+
folder_flags = [f"{f}_all_months" for f in flags]
|
|
299
|
+
|
|
300
|
+
self._generate_keywords_links(cj, folder_name, flags, folder_flags)
|
|
301
|
+
|
|
302
|
+
def _generate_keywords_links(self, cj: str, folder_name: str, flags: list[str], folder_flags: list[str]):
|
|
303
|
+
json_data = self._load_json_data(cj.title())
|
|
304
|
+
if not json_data:
|
|
305
|
+
return None
|
|
306
|
+
|
|
307
|
+
keyword_publisher_abbr = self._process_keywords(cj, json_data)
|
|
308
|
+
|
|
309
|
+
all_data_list = ["# Keywords\n\n", "| |Keywords|Links|\n", "|-|-|-|\n"]
|
|
310
|
+
idx = 1
|
|
311
|
+
for keyword in self._default_or_customized_keywords(keyword_publisher_abbr):
|
|
312
|
+
data_list = [
|
|
313
|
+
f"# {keyword.title()}\n\n",
|
|
314
|
+
"|Publishers|Abbreviations|" + "|".join(flags) + "|\n",
|
|
315
|
+
"|-|-|" + "|".join(["-" for _ in flags]) + "|\n",
|
|
316
|
+
]
|
|
317
|
+
|
|
318
|
+
for publisher in keyword_publisher_abbr[keyword]:
|
|
319
|
+
for abbr in keyword_publisher_abbr[keyword][publisher]:
|
|
320
|
+
lines = []
|
|
321
|
+
for ff in folder_flags:
|
|
322
|
+
ll = os.path.join(folder_name, cj.title(), ff, publisher.lower(), abbr, f"{abbr}.html")
|
|
323
|
+
if os.path.exists(os.path.join(self.data_base_path, ll)):
|
|
324
|
+
lines.append(f"[Link]({ll})")
|
|
325
|
+
else:
|
|
326
|
+
lines.append("")
|
|
327
|
+
|
|
328
|
+
if any(lines):
|
|
329
|
+
data_list.append(f"|{publisher}|{abbr}|" + "|".join(lines) + "|\n")
|
|
330
|
+
|
|
331
|
+
if len(data_list) == 3:
|
|
332
|
+
continue
|
|
333
|
+
|
|
334
|
+
self._write_md_file(
|
|
335
|
+
data_list, os.path.join(folder_name, f"{cj.title()}_Keywords"), f"{keyword.replace(' ', '_')}.md"
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
# Pandoc
|
|
339
|
+
self._convert_md_to_html_keyword(folder_name, cj, keyword)
|
|
340
|
+
|
|
341
|
+
ll = os.path.join(folder_name, f"{cj.title()}_Keywords", f"{keyword.replace(' ', '_')}.html")
|
|
342
|
+
all_data_list.append(f"|{idx}|{keyword}|[Link]({ll})|\n")
|
|
343
|
+
|
|
344
|
+
idx += 1
|
|
345
|
+
|
|
346
|
+
category_postfix = f"_{self.keywords_category_name.title()}" if self.keywords_category_name else ""
|
|
347
|
+
self._write_md_file(all_data_list, f"{folder_name}", f"{cj.title()}_Keywords{category_postfix}.md")
|
|
348
|
+
|
|
349
|
+
def _default_or_customized_keywords(self, json_data):
|
|
350
|
+
keywords = list(json_data.keys())
|
|
351
|
+
|
|
352
|
+
# Get and sort publication types
|
|
353
|
+
if self.keywords_category_name and self.keywords_list:
|
|
354
|
+
_keywords = []
|
|
355
|
+
for keyword in self.keywords_list:
|
|
356
|
+
if keyword in keywords:
|
|
357
|
+
_keywords.append(keyword)
|
|
358
|
+
return _keywords
|
|
359
|
+
else:
|
|
360
|
+
# default
|
|
361
|
+
return sorted(keywords)
|
|
362
|
+
|
|
363
|
+
def _convert_md_to_html_keyword(self, folder_name, cj, keyword):
|
|
364
|
+
"""Convert markdown file to HTML using pandoc."""
|
|
365
|
+
base_path = os.path.join(self.data_base_path, folder_name, f"{cj.title()}_Keywords")
|
|
366
|
+
file_md = os.path.join(base_path, f"{keyword.replace(' ', '_')}.md")
|
|
367
|
+
file_html = os.path.join(base_path, f"{keyword.replace(' ', '_')}.html")
|
|
368
|
+
|
|
369
|
+
try:
|
|
370
|
+
os.system(f"pandoc {file_md} -o {file_html}")
|
|
371
|
+
os.remove(file_md)
|
|
372
|
+
except Exception as e:
|
|
373
|
+
print(f"Pandoc conversion error: {e}")
|
|
374
|
+
|
|
375
|
+
def _process_keywords(self, cj: str, json_data: dict):
|
|
376
|
+
keyword_publisher_abbr = {}
|
|
377
|
+
|
|
378
|
+
for publisher in json_data:
|
|
379
|
+
for abbr in json_data[publisher][cj.lower()]:
|
|
380
|
+
keywords_dict = json_data[publisher][cj.lower()][abbr].get("keywords_dict", {})
|
|
381
|
+
|
|
382
|
+
# Clean and sort keywords
|
|
383
|
+
cleaned_keywords = {}
|
|
384
|
+
for category, words in keywords_dict.items():
|
|
385
|
+
if category.strip():
|
|
386
|
+
sorted_words = sorted({word.strip() for word in words if word.strip()})
|
|
387
|
+
cleaned_keywords[category.strip()] = sorted_words
|
|
388
|
+
|
|
389
|
+
# For category
|
|
390
|
+
# Flatten keywords and remove duplicates
|
|
391
|
+
all_keywords = []
|
|
392
|
+
for category, words in cleaned_keywords.items():
|
|
393
|
+
all_keywords.extend(words)
|
|
394
|
+
all_keywords.append(category)
|
|
395
|
+
all_keywords = sorted(set(all_keywords))
|
|
396
|
+
|
|
397
|
+
for keyword in all_keywords:
|
|
398
|
+
keyword_publisher_abbr.setdefault(keyword, {}).setdefault(publisher, []).append(abbr)
|
|
399
|
+
|
|
400
|
+
return keyword_publisher_abbr
|