pyeasyphd 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyeasyphd might be problematic. Click here for more details.
- pyeasyphd/.python-version +1 -0
- pyeasyphd/Main.sublime-menu +43 -0
- pyeasyphd/__init__.py +0 -0
- pyeasyphd/bib/__init__.py +1 -0
- pyeasyphd/bib/bibtexbase/__init__.py +7 -0
- pyeasyphd/bib/bibtexbase/standardize/_base.py +36 -0
- pyeasyphd/bib/bibtexbase/standardize/default_data.py +97 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +54 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +38 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +310 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +35 -0
- pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +34 -0
- pyeasyphd/bib/bibtexbase/standardize_bib.py +75 -0
- pyeasyphd/bib/bibtexparser/__init__.py +47 -0
- pyeasyphd/bib/bibtexparser/bibtex_format.py +87 -0
- pyeasyphd/bib/bibtexparser/exceptions.py +64 -0
- pyeasyphd/bib/bibtexparser/library.py +207 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/add.py +94 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +22 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +62 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +47 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +31 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +222 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +34 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +33 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +70 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +15 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +113 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +34 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +21 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +28 -0
- pyeasyphd/bib/bibtexparser/middlewares/block/title.py +20 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +98 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +29 -0
- pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +124 -0
- pyeasyphd/bib/bibtexparser/middlewares/middleware.py +222 -0
- pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +13 -0
- pyeasyphd/bib/bibtexparser/middlewares/utils.py +226 -0
- pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +414 -0
- pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +42 -0
- pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +35 -0
- pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +29 -0
- pyeasyphd/bib/bibtexparser/model.py +481 -0
- pyeasyphd/bib/bibtexparser/splitter.py +151 -0
- pyeasyphd/bib/core/__init__.py +18 -0
- pyeasyphd/bib/core/convert_library_to_library.py +31 -0
- pyeasyphd/bib/core/convert_library_to_str.py +199 -0
- pyeasyphd/bib/core/convert_str_to_library.py +34 -0
- pyeasyphd/bib/core/convert_str_to_str.py +27 -0
- pyeasyphd/main/__init__.py +17 -0
- pyeasyphd/main/basic_input.py +149 -0
- pyeasyphd/main/pandoc_md_to.py +361 -0
- pyeasyphd/main/python_run_bib.py +73 -0
- pyeasyphd/main/python_run_md.py +235 -0
- pyeasyphd/main/python_run_tex.py +149 -0
- pyeasyphd/main/python_writers.py +212 -0
- pyeasyphd/pyeasyphd.py +72 -0
- pyeasyphd/pyeasyphd.sublime-settings +235 -0
- pyeasyphd/pyeasyphd.sublime-syntax +5 -0
- pyeasyphd/tools/__init__.py +30 -0
- pyeasyphd/tools/compare/compare_bibs.py +234 -0
- pyeasyphd/tools/experiments_base.py +203 -0
- pyeasyphd/tools/format_save_bibs.py +178 -0
- pyeasyphd/tools/generate/generate_from_bibs.py +447 -0
- pyeasyphd/tools/generate/generate_links.py +356 -0
- pyeasyphd/tools/py_run_bib_md_tex.py +378 -0
- pyeasyphd/tools/replace/replace.py +81 -0
- pyeasyphd/tools/search/data.py +318 -0
- pyeasyphd/tools/search/search_base.py +118 -0
- pyeasyphd/tools/search/search_core.py +326 -0
- pyeasyphd/tools/search/search_keywords.py +227 -0
- pyeasyphd/tools/search/search_writers.py +288 -0
- pyeasyphd/tools/search/utils.py +152 -0
- pyeasyphd/tools/spider/process_spider_bib.py +247 -0
- pyeasyphd/tools/spider/process_spider_url.py +74 -0
- pyeasyphd/tools/spider/process_spider_url_bib.py +62 -0
- pyeasyphd/utils/utils.py +62 -0
- pyeasyphd-0.0.2.dist-info/METADATA +27 -0
- pyeasyphd-0.0.2.dist-info/RECORD +80 -0
- pyeasyphd-0.0.2.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
# coding=utf-8
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from pyadvtools import standard_path
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PaperLinksGenerator:
|
|
12
|
+
"""Generate markdown files with paper links from JSON data."""
|
|
13
|
+
|
|
14
|
+
def __init__(
|
|
15
|
+
self,
|
|
16
|
+
json_base_path: str,
|
|
17
|
+
data_base_path: str,
|
|
18
|
+
default_publication_keywords: Optional[List[str]] = None,
|
|
19
|
+
display_year_period: int = 10,
|
|
20
|
+
):
|
|
21
|
+
"""
|
|
22
|
+
Initialize the generator with base paths.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
json_base_path: Path to JSON files directory
|
|
26
|
+
data_base_path: Path to data files directory
|
|
27
|
+
"""
|
|
28
|
+
self.json_base_path = standard_path(json_base_path)
|
|
29
|
+
self.data_base_path = standard_path(data_base_path)
|
|
30
|
+
|
|
31
|
+
if (default_publication_keywords is None) or (len(default_publication_keywords) == 0):
|
|
32
|
+
x = self._load_json_data("keywords").get("default_keywords", [])
|
|
33
|
+
if isinstance(x, list):
|
|
34
|
+
default_publication_keywords = x
|
|
35
|
+
else:
|
|
36
|
+
default_publication_keywords = []
|
|
37
|
+
|
|
38
|
+
self.default_publication_keywords: List[str] = default_publication_keywords
|
|
39
|
+
|
|
40
|
+
self.display_year_period = display_year_period
|
|
41
|
+
|
|
42
|
+
def generate_yearly_links(self, cj: str, folder_name="data/Yearly") -> None:
|
|
43
|
+
"""
|
|
44
|
+
Generate yearly markdown table with paper links.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
cj: Publication type - 'conferences' or 'journals'
|
|
48
|
+
"""
|
|
49
|
+
flags = self._get_yearly_flags(cj)
|
|
50
|
+
folder_flags = [f"{f}_all_months" for f in flags]
|
|
51
|
+
|
|
52
|
+
self._generate_links(cj, flags, folder_flags, folder_name)
|
|
53
|
+
|
|
54
|
+
def generate_weekly_links(self, folder_name="data/Weekly") -> None:
|
|
55
|
+
"""Generate weekly markdown table with journal paper links."""
|
|
56
|
+
cj = "Journals"
|
|
57
|
+
|
|
58
|
+
flags = ["Current Issue", "Current Month", "All Months"]
|
|
59
|
+
folder_flags = [f"current_year_{f.replace(' ', '_').lower()}" for f in flags]
|
|
60
|
+
|
|
61
|
+
self._generate_links(cj, flags, folder_flags, folder_name)
|
|
62
|
+
|
|
63
|
+
def _generate_links(self, cj, flags, folder_flags, folder_name) -> None:
|
|
64
|
+
json_data = self._load_json_data(cj.lower())
|
|
65
|
+
if not json_data:
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
# publisher
|
|
69
|
+
md_content = self._create_md_header_publisher(cj, flags)
|
|
70
|
+
table_rows = self._generate_table_rows_publisher(json_data, cj, folder_flags, folder_name)
|
|
71
|
+
if table_rows:
|
|
72
|
+
md_content.extend(table_rows)
|
|
73
|
+
self._write_md_file(md_content, folder_name, f"{cj}_Publisher.md")
|
|
74
|
+
|
|
75
|
+
# abbr
|
|
76
|
+
md_content = self._create_md_header_abbr(cj, flags)
|
|
77
|
+
table_rows = self._generate_table_rows_abbr(json_data, cj, folder_flags, folder_name)
|
|
78
|
+
if table_rows:
|
|
79
|
+
md_content.extend(table_rows)
|
|
80
|
+
self._write_md_file(md_content, folder_name, f"{cj}_Abbr.md")
|
|
81
|
+
self._convert_md_to_html(folder_name, f"{cj}_Abbr")
|
|
82
|
+
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
def _convert_md_to_html(self, folder_name, file_name):
|
|
86
|
+
"""Convert markdown file to HTML using pandoc."""
|
|
87
|
+
base_path = os.path.join(self.data_base_path, f"{folder_name}")
|
|
88
|
+
file_md = os.path.join(base_path, f"{file_name}.md")
|
|
89
|
+
file_html = os.path.join(base_path, f"{file_name}.html")
|
|
90
|
+
|
|
91
|
+
try:
|
|
92
|
+
os.system(f"pandoc {file_md} -o {file_html}")
|
|
93
|
+
os.remove(file_md)
|
|
94
|
+
except Exception as e:
|
|
95
|
+
print(f"Pandoc conversion error: {e}")
|
|
96
|
+
|
|
97
|
+
def generate_ieee_early_access_links(self, folder_name="data/Weekly") -> None:
|
|
98
|
+
"""Generate markdown for IEEE Early Access papers."""
|
|
99
|
+
md_content = [
|
|
100
|
+
"# Papers from Early Access\n\n",
|
|
101
|
+
"|Publisher|**Current Month Papers**|**All Papers**|\n",
|
|
102
|
+
"|-|-|-|\n",
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
links = self._get_ieee_links()
|
|
106
|
+
if any(links):
|
|
107
|
+
md_content.append(f"|IEEE|{links[0]}|{links[1]}|\n")
|
|
108
|
+
self._write_md_file(md_content, folder_name, "Journals_Early_Access.md")
|
|
109
|
+
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
def _load_json_data(self, file_name: str) -> Dict:
|
|
113
|
+
"""Load JSON data from file."""
|
|
114
|
+
try:
|
|
115
|
+
file_path = os.path.join(self.json_base_path, f"{file_name}.json")
|
|
116
|
+
if not os.path.exists(file_path):
|
|
117
|
+
return {}
|
|
118
|
+
|
|
119
|
+
with open(file_path, "r", encoding="utf-8") as file:
|
|
120
|
+
return json.load(file)
|
|
121
|
+
|
|
122
|
+
except Exception as e:
|
|
123
|
+
print(f"Error loading {file_name}.json: {e}")
|
|
124
|
+
return {}
|
|
125
|
+
|
|
126
|
+
def _get_yearly_flags(self, cj: str) -> List[str]:
|
|
127
|
+
"""Get yearly flags based on publication type."""
|
|
128
|
+
current_year = datetime.now().year
|
|
129
|
+
years = [str(y) for y in range(current_year - self.display_year_period, current_year)]
|
|
130
|
+
flags = sorted(years, reverse=True)
|
|
131
|
+
|
|
132
|
+
if cj.lower() == "conferences":
|
|
133
|
+
flags = [str(current_year)] + flags
|
|
134
|
+
|
|
135
|
+
return flags
|
|
136
|
+
|
|
137
|
+
def _create_md_header_publisher(self, cj: str, flags: List[str]) -> List[str]:
|
|
138
|
+
"""Create markdown table header."""
|
|
139
|
+
return [
|
|
140
|
+
f"# Papers from {cj.title()} of Different Publishers\n\n",
|
|
141
|
+
f"| | {'|'.join(f'**{f}**' for f in flags)}|\n",
|
|
142
|
+
f"|-|{'|'.join('-' for _ in flags)}|\n",
|
|
143
|
+
]
|
|
144
|
+
|
|
145
|
+
def _create_md_header_abbr(self, cj: str, flags: List[str]) -> List[str]:
|
|
146
|
+
"""Create markdown table header."""
|
|
147
|
+
return [
|
|
148
|
+
f"# Papers from {cj.title()} of Different Publishers\n\n",
|
|
149
|
+
f"| |Publishers|{'|'.join(f'**{f}**' for f in flags)}|\n",
|
|
150
|
+
f"|-|-|{'|'.join('-' for _ in flags)}|\n",
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
# publisher
|
|
154
|
+
def _generate_table_rows_publisher(
|
|
155
|
+
self, json_data: Dict, cj: str, folder_flags: List[str], period: str
|
|
156
|
+
) -> List[str]:
|
|
157
|
+
"""Generate markdown table rows."""
|
|
158
|
+
rows = []
|
|
159
|
+
idx = 1
|
|
160
|
+
|
|
161
|
+
for publisher in json_data:
|
|
162
|
+
cells = self._get_link_cells_publisher(publisher, cj, folder_flags, period)
|
|
163
|
+
if any(cells):
|
|
164
|
+
rows.append(f"|{idx}|{'|'.join(cells)}|\n")
|
|
165
|
+
idx += 1
|
|
166
|
+
|
|
167
|
+
return rows
|
|
168
|
+
|
|
169
|
+
def _get_link_cells_publisher(self, publisher: str, cj: str, folder_flags: List[str], period: str) -> List[str]:
|
|
170
|
+
"""Get link cells for a publisher."""
|
|
171
|
+
cells = []
|
|
172
|
+
|
|
173
|
+
for flag in folder_flags:
|
|
174
|
+
link_path = f"{period}/{cj}/{flag}/{publisher.lower()}/{publisher.lower()}_link.html"
|
|
175
|
+
full_path = os.path.join(self.data_base_path, link_path)
|
|
176
|
+
|
|
177
|
+
if os.path.exists(full_path):
|
|
178
|
+
cells.append(f"[{publisher}]({link_path})")
|
|
179
|
+
else:
|
|
180
|
+
cells.append("")
|
|
181
|
+
|
|
182
|
+
return cells
|
|
183
|
+
|
|
184
|
+
# abbr
|
|
185
|
+
def _generate_table_rows_abbr(self, json_data: Dict, cj: str, folder_flags: List[str], period: str) -> List[str]:
|
|
186
|
+
"""Generate markdown table rows."""
|
|
187
|
+
rows = []
|
|
188
|
+
idx = 1
|
|
189
|
+
|
|
190
|
+
for publisher in json_data:
|
|
191
|
+
if cj.lower() not in json_data[publisher]:
|
|
192
|
+
continue
|
|
193
|
+
|
|
194
|
+
for abbr in json_data[publisher][cj.lower()]:
|
|
195
|
+
cells = self._get_link_cells_abbr(publisher, abbr, cj, folder_flags, period)
|
|
196
|
+
if any(cells):
|
|
197
|
+
rows.append(f"|{idx}|{publisher}|{'|'.join(cells)}|\n")
|
|
198
|
+
idx += 1
|
|
199
|
+
|
|
200
|
+
return rows
|
|
201
|
+
|
|
202
|
+
def _get_link_cells_abbr(
|
|
203
|
+
self, publisher: str, abbr: str, cj: str, folder_flags: List[str], period: str
|
|
204
|
+
) -> List[str]:
|
|
205
|
+
"""Get link cells for a abbr."""
|
|
206
|
+
cells = []
|
|
207
|
+
for flag in folder_flags:
|
|
208
|
+
link_path = f"{period}/{cj}/{flag}/{publisher.lower()}/{abbr}/{abbr}.html"
|
|
209
|
+
full_path = os.path.join(self.data_base_path, link_path)
|
|
210
|
+
if os.path.exists(full_path):
|
|
211
|
+
cells.append(f"[{abbr}]({link_path})")
|
|
212
|
+
else:
|
|
213
|
+
cells.append("")
|
|
214
|
+
|
|
215
|
+
return cells
|
|
216
|
+
|
|
217
|
+
def _get_ieee_links(self, folder_name="data/Weekly") -> List[str]:
|
|
218
|
+
"""Get IEEE Early Access links."""
|
|
219
|
+
links = []
|
|
220
|
+
link_paths = [
|
|
221
|
+
f"{folder_name}/Journals_Early_Access/current_year_current_month/ieee/ieee_link.html",
|
|
222
|
+
f"{folder_name}/Journals_Early_Access/all_years_all_months/ieee/ieee_link.html",
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
for link_path in link_paths:
|
|
226
|
+
full_path = os.path.join(self.data_base_path, link_path)
|
|
227
|
+
if os.path.exists(full_path):
|
|
228
|
+
links.append(f"[IEEE Early Access]({link_path})")
|
|
229
|
+
else:
|
|
230
|
+
links.append("")
|
|
231
|
+
|
|
232
|
+
return links
|
|
233
|
+
|
|
234
|
+
def _write_md_file(self, content: List[str], period: str, file_name: str) -> None:
|
|
235
|
+
"""Write markdown content to file."""
|
|
236
|
+
if len(content) == 0:
|
|
237
|
+
return None
|
|
238
|
+
|
|
239
|
+
output_dir = os.path.join(self.data_base_path, period)
|
|
240
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
241
|
+
|
|
242
|
+
output_file = os.path.join(output_dir, file_name)
|
|
243
|
+
with open(output_file, "w") as f:
|
|
244
|
+
f.writelines(content)
|
|
245
|
+
print(f"Generated: {output_file}")
|
|
246
|
+
|
|
247
|
+
return None
|
|
248
|
+
|
|
249
|
+
#
|
|
250
|
+
def generate_keywords_links_monthly(self, cj: str, folder_name="data/Weekly"):
|
|
251
|
+
flags = ["Current Issue", "Current Month", "All Months"]
|
|
252
|
+
folder_flags = [f"current_year_{f.replace(' ', '_').lower()}" for f in flags]
|
|
253
|
+
|
|
254
|
+
self._generate_keywords_links(cj, folder_name, flags, folder_flags)
|
|
255
|
+
|
|
256
|
+
def generate_keywords_links_yearly(self, cj: str, folder_name="data/Yearly"):
|
|
257
|
+
flags = self._get_yearly_flags(cj)
|
|
258
|
+
folder_flags = [f"{f}_all_months" for f in flags]
|
|
259
|
+
|
|
260
|
+
self._generate_keywords_links(cj, folder_name, flags, folder_flags)
|
|
261
|
+
|
|
262
|
+
def _generate_keywords_links(self, cj: str, folder_name: str, flags: List[str], folder_flags: List[str]):
|
|
263
|
+
json_data = self._load_json_data(cj.title())
|
|
264
|
+
if not json_data:
|
|
265
|
+
return None
|
|
266
|
+
|
|
267
|
+
keyword_publisher_abbr = self._process_keywords(cj, json_data)
|
|
268
|
+
|
|
269
|
+
# Get and sort publication types
|
|
270
|
+
pub_types = list(keyword_publisher_abbr.keys())
|
|
271
|
+
default_pub_types = self.default_publication_keywords
|
|
272
|
+
default_pub_types = self.default_publication_keywords + sorted(
|
|
273
|
+
list(set(pub_types) - set(self.default_publication_keywords))
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
all_data_list = ["# Keywords\n\n", "| |Keywords|Links|\n", "|-|-|-|\n"]
|
|
277
|
+
idx = 1
|
|
278
|
+
for keyword in sorted(pub_types, key=default_pub_types.index):
|
|
279
|
+
data_list = [
|
|
280
|
+
f"# {keyword.title()}\n\n",
|
|
281
|
+
"|Publishers|Abbreviations|" + "|".join(flags) + "|\n",
|
|
282
|
+
"|-|-|" + "|".join(["-" for _ in flags]) + "|\n",
|
|
283
|
+
]
|
|
284
|
+
|
|
285
|
+
for publisher in keyword_publisher_abbr[keyword]:
|
|
286
|
+
for abbr in keyword_publisher_abbr[keyword][publisher]:
|
|
287
|
+
lines = [
|
|
288
|
+
f"[Link]({folder_name}/{cj.title()}/{ff}/{publisher.lower()}/{abbr}/{abbr}.html)"
|
|
289
|
+
for ff in folder_flags
|
|
290
|
+
]
|
|
291
|
+
|
|
292
|
+
if any(self._check_file_exists(ff, folder_name, cj, publisher, abbr) for ff in folder_flags):
|
|
293
|
+
data_list.append(f"|{publisher}|{abbr}|" + "|".join(lines) + "|\n")
|
|
294
|
+
|
|
295
|
+
if len(data_list) == 3:
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
self._write_md_file(data_list, f"{folder_name}/{cj.title()}_Keywords", f"{keyword.replace(' ', '_')}.md")
|
|
299
|
+
|
|
300
|
+
# Pandoc
|
|
301
|
+
self._convert_md_to_html_keyword(folder_name, cj, keyword)
|
|
302
|
+
|
|
303
|
+
all_data_list.append(
|
|
304
|
+
f"|{idx}|{keyword}|[Link]({folder_name}/{cj.title()}_Keywords/{keyword.replace(' ', '_')}.html)|\n"
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
idx += 1
|
|
308
|
+
|
|
309
|
+
self._write_md_file(all_data_list, f"{folder_name}", f"{cj.title()}_Keywords.md")
|
|
310
|
+
|
|
311
|
+
def _check_file_exists(self, folder, folder_name, cj, publisher, abbr):
|
|
312
|
+
"""Check if HTML file exists for given parameters."""
|
|
313
|
+
file_path = os.path.join(
|
|
314
|
+
self.data_base_path, f"{folder_name}/{cj.title()}/{folder}/{publisher.lower()}/{abbr}/{abbr}.html"
|
|
315
|
+
)
|
|
316
|
+
return os.path.exists(file_path)
|
|
317
|
+
|
|
318
|
+
def _convert_md_to_html_keyword(self, folder_name, cj, keyword):
|
|
319
|
+
"""Convert markdown file to HTML using pandoc."""
|
|
320
|
+
base_path = os.path.join(self.data_base_path, f"{folder_name}/{cj.title()}_Keywords")
|
|
321
|
+
file_md = os.path.join(base_path, f"{keyword.replace(' ', '_')}.md")
|
|
322
|
+
file_html = os.path.join(base_path, f"{keyword.replace(' ', '_')}.html")
|
|
323
|
+
|
|
324
|
+
try:
|
|
325
|
+
os.system(f"pandoc {file_md} -o {file_html}")
|
|
326
|
+
os.remove(file_md)
|
|
327
|
+
except Exception as e:
|
|
328
|
+
print(f"Pandoc conversion error: {e}")
|
|
329
|
+
|
|
330
|
+
def _process_keywords(self, cj: str, json_data: dict):
|
|
331
|
+
keyword_publisher_abbr = {}
|
|
332
|
+
|
|
333
|
+
for publisher in json_data:
|
|
334
|
+
for abbr in json_data[publisher][cj.lower()]:
|
|
335
|
+
|
|
336
|
+
keywords_dict = json_data[publisher][cj.lower()][abbr].get("keywords_dict", {})
|
|
337
|
+
|
|
338
|
+
# Clean and sort keywords
|
|
339
|
+
cleaned_keywords = {}
|
|
340
|
+
for category, words in keywords_dict.items():
|
|
341
|
+
if category.strip():
|
|
342
|
+
sorted_words = sorted(set([word.strip() for word in words if word.strip()]))
|
|
343
|
+
cleaned_keywords[category.strip()] = sorted_words
|
|
344
|
+
|
|
345
|
+
# For category
|
|
346
|
+
# Flatten keywords and remove duplicates
|
|
347
|
+
all_keywords = []
|
|
348
|
+
for category, words in cleaned_keywords.items():
|
|
349
|
+
all_keywords.extend(words)
|
|
350
|
+
all_keywords.append(category)
|
|
351
|
+
all_keywords = sorted(set(all_keywords))
|
|
352
|
+
|
|
353
|
+
for keyword in all_keywords:
|
|
354
|
+
keyword_publisher_abbr.setdefault(keyword, {}).setdefault(publisher, []).append(abbr)
|
|
355
|
+
|
|
356
|
+
return keyword_publisher_abbr
|