pyeasyphd 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyeasyphd might be problematic. Click here for more details.

Files changed (80) hide show
  1. pyeasyphd/.python-version +1 -0
  2. pyeasyphd/Main.sublime-menu +43 -0
  3. pyeasyphd/__init__.py +0 -0
  4. pyeasyphd/bib/__init__.py +1 -0
  5. pyeasyphd/bib/bibtexbase/__init__.py +7 -0
  6. pyeasyphd/bib/bibtexbase/standardize/_base.py +36 -0
  7. pyeasyphd/bib/bibtexbase/standardize/default_data.py +97 -0
  8. pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +54 -0
  9. pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +38 -0
  10. pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +310 -0
  11. pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +35 -0
  12. pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +34 -0
  13. pyeasyphd/bib/bibtexbase/standardize_bib.py +75 -0
  14. pyeasyphd/bib/bibtexparser/__init__.py +47 -0
  15. pyeasyphd/bib/bibtexparser/bibtex_format.py +87 -0
  16. pyeasyphd/bib/bibtexparser/exceptions.py +64 -0
  17. pyeasyphd/bib/bibtexparser/library.py +207 -0
  18. pyeasyphd/bib/bibtexparser/middlewares/block/add.py +94 -0
  19. pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +22 -0
  20. pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +62 -0
  21. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +47 -0
  22. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +31 -0
  23. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +222 -0
  24. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +34 -0
  25. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +33 -0
  26. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +70 -0
  27. pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +15 -0
  28. pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +113 -0
  29. pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +34 -0
  30. pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +21 -0
  31. pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +28 -0
  32. pyeasyphd/bib/bibtexparser/middlewares/block/title.py +20 -0
  33. pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +98 -0
  34. pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +29 -0
  35. pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +124 -0
  36. pyeasyphd/bib/bibtexparser/middlewares/middleware.py +222 -0
  37. pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +13 -0
  38. pyeasyphd/bib/bibtexparser/middlewares/utils.py +226 -0
  39. pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +414 -0
  40. pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +42 -0
  41. pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +35 -0
  42. pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +29 -0
  43. pyeasyphd/bib/bibtexparser/model.py +481 -0
  44. pyeasyphd/bib/bibtexparser/splitter.py +151 -0
  45. pyeasyphd/bib/core/__init__.py +18 -0
  46. pyeasyphd/bib/core/convert_library_to_library.py +31 -0
  47. pyeasyphd/bib/core/convert_library_to_str.py +199 -0
  48. pyeasyphd/bib/core/convert_str_to_library.py +34 -0
  49. pyeasyphd/bib/core/convert_str_to_str.py +27 -0
  50. pyeasyphd/main/__init__.py +17 -0
  51. pyeasyphd/main/basic_input.py +149 -0
  52. pyeasyphd/main/pandoc_md_to.py +361 -0
  53. pyeasyphd/main/python_run_bib.py +73 -0
  54. pyeasyphd/main/python_run_md.py +235 -0
  55. pyeasyphd/main/python_run_tex.py +149 -0
  56. pyeasyphd/main/python_writers.py +212 -0
  57. pyeasyphd/pyeasyphd.py +72 -0
  58. pyeasyphd/pyeasyphd.sublime-settings +235 -0
  59. pyeasyphd/pyeasyphd.sublime-syntax +5 -0
  60. pyeasyphd/tools/__init__.py +30 -0
  61. pyeasyphd/tools/compare/compare_bibs.py +234 -0
  62. pyeasyphd/tools/experiments_base.py +203 -0
  63. pyeasyphd/tools/format_save_bibs.py +178 -0
  64. pyeasyphd/tools/generate/generate_from_bibs.py +447 -0
  65. pyeasyphd/tools/generate/generate_links.py +356 -0
  66. pyeasyphd/tools/py_run_bib_md_tex.py +378 -0
  67. pyeasyphd/tools/replace/replace.py +81 -0
  68. pyeasyphd/tools/search/data.py +318 -0
  69. pyeasyphd/tools/search/search_base.py +118 -0
  70. pyeasyphd/tools/search/search_core.py +326 -0
  71. pyeasyphd/tools/search/search_keywords.py +227 -0
  72. pyeasyphd/tools/search/search_writers.py +288 -0
  73. pyeasyphd/tools/search/utils.py +152 -0
  74. pyeasyphd/tools/spider/process_spider_bib.py +247 -0
  75. pyeasyphd/tools/spider/process_spider_url.py +74 -0
  76. pyeasyphd/tools/spider/process_spider_url_bib.py +62 -0
  77. pyeasyphd/utils/utils.py +62 -0
  78. pyeasyphd-0.0.2.dist-info/METADATA +27 -0
  79. pyeasyphd-0.0.2.dist-info/RECORD +80 -0
  80. pyeasyphd-0.0.2.dist-info/WHEEL +4 -0
@@ -0,0 +1,356 @@
1
+ # coding=utf-8
2
+
3
+ import json
4
+ import os
5
+ from datetime import datetime
6
+ from typing import Dict, List, Optional
7
+
8
+ from pyadvtools import standard_path
9
+
10
+
11
+ class PaperLinksGenerator:
12
+ """Generate markdown files with paper links from JSON data."""
13
+
14
+ def __init__(
15
+ self,
16
+ json_base_path: str,
17
+ data_base_path: str,
18
+ default_publication_keywords: Optional[List[str]] = None,
19
+ display_year_period: int = 10,
20
+ ):
21
+ """
22
+ Initialize the generator with base paths.
23
+
24
+ Args:
25
+ json_base_path: Path to JSON files directory
26
+ data_base_path: Path to data files directory
27
+ """
28
+ self.json_base_path = standard_path(json_base_path)
29
+ self.data_base_path = standard_path(data_base_path)
30
+
31
+ if (default_publication_keywords is None) or (len(default_publication_keywords) == 0):
32
+ x = self._load_json_data("keywords").get("default_keywords", [])
33
+ if isinstance(x, list):
34
+ default_publication_keywords = x
35
+ else:
36
+ default_publication_keywords = []
37
+
38
+ self.default_publication_keywords: List[str] = default_publication_keywords
39
+
40
+ self.display_year_period = display_year_period
41
+
42
+ def generate_yearly_links(self, cj: str, folder_name="data/Yearly") -> None:
43
+ """
44
+ Generate yearly markdown table with paper links.
45
+
46
+ Args:
47
+ cj: Publication type - 'conferences' or 'journals'
48
+ """
49
+ flags = self._get_yearly_flags(cj)
50
+ folder_flags = [f"{f}_all_months" for f in flags]
51
+
52
+ self._generate_links(cj, flags, folder_flags, folder_name)
53
+
54
+ def generate_weekly_links(self, folder_name="data/Weekly") -> None:
55
+ """Generate weekly markdown table with journal paper links."""
56
+ cj = "Journals"
57
+
58
+ flags = ["Current Issue", "Current Month", "All Months"]
59
+ folder_flags = [f"current_year_{f.replace(' ', '_').lower()}" for f in flags]
60
+
61
+ self._generate_links(cj, flags, folder_flags, folder_name)
62
+
63
+ def _generate_links(self, cj, flags, folder_flags, folder_name) -> None:
64
+ json_data = self._load_json_data(cj.lower())
65
+ if not json_data:
66
+ return None
67
+
68
+ # publisher
69
+ md_content = self._create_md_header_publisher(cj, flags)
70
+ table_rows = self._generate_table_rows_publisher(json_data, cj, folder_flags, folder_name)
71
+ if table_rows:
72
+ md_content.extend(table_rows)
73
+ self._write_md_file(md_content, folder_name, f"{cj}_Publisher.md")
74
+
75
+ # abbr
76
+ md_content = self._create_md_header_abbr(cj, flags)
77
+ table_rows = self._generate_table_rows_abbr(json_data, cj, folder_flags, folder_name)
78
+ if table_rows:
79
+ md_content.extend(table_rows)
80
+ self._write_md_file(md_content, folder_name, f"{cj}_Abbr.md")
81
+ self._convert_md_to_html(folder_name, f"{cj}_Abbr")
82
+
83
+ return None
84
+
85
+ def _convert_md_to_html(self, folder_name, file_name):
86
+ """Convert markdown file to HTML using pandoc."""
87
+ base_path = os.path.join(self.data_base_path, f"{folder_name}")
88
+ file_md = os.path.join(base_path, f"{file_name}.md")
89
+ file_html = os.path.join(base_path, f"{file_name}.html")
90
+
91
+ try:
92
+ os.system(f"pandoc {file_md} -o {file_html}")
93
+ os.remove(file_md)
94
+ except Exception as e:
95
+ print(f"Pandoc conversion error: {e}")
96
+
97
+ def generate_ieee_early_access_links(self, folder_name="data/Weekly") -> None:
98
+ """Generate markdown for IEEE Early Access papers."""
99
+ md_content = [
100
+ "# Papers from Early Access\n\n",
101
+ "|Publisher|**Current Month Papers**|**All Papers**|\n",
102
+ "|-|-|-|\n",
103
+ ]
104
+
105
+ links = self._get_ieee_links()
106
+ if any(links):
107
+ md_content.append(f"|IEEE|{links[0]}|{links[1]}|\n")
108
+ self._write_md_file(md_content, folder_name, "Journals_Early_Access.md")
109
+
110
+ return None
111
+
112
+ def _load_json_data(self, file_name: str) -> Dict:
113
+ """Load JSON data from file."""
114
+ try:
115
+ file_path = os.path.join(self.json_base_path, f"{file_name}.json")
116
+ if not os.path.exists(file_path):
117
+ return {}
118
+
119
+ with open(file_path, "r", encoding="utf-8") as file:
120
+ return json.load(file)
121
+
122
+ except Exception as e:
123
+ print(f"Error loading {file_name}.json: {e}")
124
+ return {}
125
+
126
+ def _get_yearly_flags(self, cj: str) -> List[str]:
127
+ """Get yearly flags based on publication type."""
128
+ current_year = datetime.now().year
129
+ years = [str(y) for y in range(current_year - self.display_year_period, current_year)]
130
+ flags = sorted(years, reverse=True)
131
+
132
+ if cj.lower() == "conferences":
133
+ flags = [str(current_year)] + flags
134
+
135
+ return flags
136
+
137
+ def _create_md_header_publisher(self, cj: str, flags: List[str]) -> List[str]:
138
+ """Create markdown table header."""
139
+ return [
140
+ f"# Papers from {cj.title()} of Different Publishers\n\n",
141
+ f"| | {'|'.join(f'**{f}**' for f in flags)}|\n",
142
+ f"|-|{'|'.join('-' for _ in flags)}|\n",
143
+ ]
144
+
145
+ def _create_md_header_abbr(self, cj: str, flags: List[str]) -> List[str]:
146
+ """Create markdown table header."""
147
+ return [
148
+ f"# Papers from {cj.title()} of Different Publishers\n\n",
149
+ f"| |Publishers|{'|'.join(f'**{f}**' for f in flags)}|\n",
150
+ f"|-|-|{'|'.join('-' for _ in flags)}|\n",
151
+ ]
152
+
153
+ # publisher
154
+ def _generate_table_rows_publisher(
155
+ self, json_data: Dict, cj: str, folder_flags: List[str], period: str
156
+ ) -> List[str]:
157
+ """Generate markdown table rows."""
158
+ rows = []
159
+ idx = 1
160
+
161
+ for publisher in json_data:
162
+ cells = self._get_link_cells_publisher(publisher, cj, folder_flags, period)
163
+ if any(cells):
164
+ rows.append(f"|{idx}|{'|'.join(cells)}|\n")
165
+ idx += 1
166
+
167
+ return rows
168
+
169
+ def _get_link_cells_publisher(self, publisher: str, cj: str, folder_flags: List[str], period: str) -> List[str]:
170
+ """Get link cells for a publisher."""
171
+ cells = []
172
+
173
+ for flag in folder_flags:
174
+ link_path = f"{period}/{cj}/{flag}/{publisher.lower()}/{publisher.lower()}_link.html"
175
+ full_path = os.path.join(self.data_base_path, link_path)
176
+
177
+ if os.path.exists(full_path):
178
+ cells.append(f"[{publisher}]({link_path})")
179
+ else:
180
+ cells.append("")
181
+
182
+ return cells
183
+
184
+ # abbr
185
+ def _generate_table_rows_abbr(self, json_data: Dict, cj: str, folder_flags: List[str], period: str) -> List[str]:
186
+ """Generate markdown table rows."""
187
+ rows = []
188
+ idx = 1
189
+
190
+ for publisher in json_data:
191
+ if cj.lower() not in json_data[publisher]:
192
+ continue
193
+
194
+ for abbr in json_data[publisher][cj.lower()]:
195
+ cells = self._get_link_cells_abbr(publisher, abbr, cj, folder_flags, period)
196
+ if any(cells):
197
+ rows.append(f"|{idx}|{publisher}|{'|'.join(cells)}|\n")
198
+ idx += 1
199
+
200
+ return rows
201
+
202
+ def _get_link_cells_abbr(
203
+ self, publisher: str, abbr: str, cj: str, folder_flags: List[str], period: str
204
+ ) -> List[str]:
205
+ """Get link cells for a abbr."""
206
+ cells = []
207
+ for flag in folder_flags:
208
+ link_path = f"{period}/{cj}/{flag}/{publisher.lower()}/{abbr}/{abbr}.html"
209
+ full_path = os.path.join(self.data_base_path, link_path)
210
+ if os.path.exists(full_path):
211
+ cells.append(f"[{abbr}]({link_path})")
212
+ else:
213
+ cells.append("")
214
+
215
+ return cells
216
+
217
+ def _get_ieee_links(self, folder_name="data/Weekly") -> List[str]:
218
+ """Get IEEE Early Access links."""
219
+ links = []
220
+ link_paths = [
221
+ f"{folder_name}/Journals_Early_Access/current_year_current_month/ieee/ieee_link.html",
222
+ f"{folder_name}/Journals_Early_Access/all_years_all_months/ieee/ieee_link.html",
223
+ ]
224
+
225
+ for link_path in link_paths:
226
+ full_path = os.path.join(self.data_base_path, link_path)
227
+ if os.path.exists(full_path):
228
+ links.append(f"[IEEE Early Access]({link_path})")
229
+ else:
230
+ links.append("")
231
+
232
+ return links
233
+
234
+ def _write_md_file(self, content: List[str], period: str, file_name: str) -> None:
235
+ """Write markdown content to file."""
236
+ if len(content) == 0:
237
+ return None
238
+
239
+ output_dir = os.path.join(self.data_base_path, period)
240
+ os.makedirs(output_dir, exist_ok=True)
241
+
242
+ output_file = os.path.join(output_dir, file_name)
243
+ with open(output_file, "w") as f:
244
+ f.writelines(content)
245
+ print(f"Generated: {output_file}")
246
+
247
+ return None
248
+
249
+ #
250
+ def generate_keywords_links_monthly(self, cj: str, folder_name="data/Weekly"):
251
+ flags = ["Current Issue", "Current Month", "All Months"]
252
+ folder_flags = [f"current_year_{f.replace(' ', '_').lower()}" for f in flags]
253
+
254
+ self._generate_keywords_links(cj, folder_name, flags, folder_flags)
255
+
256
+ def generate_keywords_links_yearly(self, cj: str, folder_name="data/Yearly"):
257
+ flags = self._get_yearly_flags(cj)
258
+ folder_flags = [f"{f}_all_months" for f in flags]
259
+
260
+ self._generate_keywords_links(cj, folder_name, flags, folder_flags)
261
+
262
+ def _generate_keywords_links(self, cj: str, folder_name: str, flags: List[str], folder_flags: List[str]):
263
+ json_data = self._load_json_data(cj.title())
264
+ if not json_data:
265
+ return None
266
+
267
+ keyword_publisher_abbr = self._process_keywords(cj, json_data)
268
+
269
+ # Get and sort publication types
270
+ pub_types = list(keyword_publisher_abbr.keys())
271
+ default_pub_types = self.default_publication_keywords
272
+ default_pub_types = self.default_publication_keywords + sorted(
273
+ list(set(pub_types) - set(self.default_publication_keywords))
274
+ )
275
+
276
+ all_data_list = ["# Keywords\n\n", "| |Keywords|Links|\n", "|-|-|-|\n"]
277
+ idx = 1
278
+ for keyword in sorted(pub_types, key=default_pub_types.index):
279
+ data_list = [
280
+ f"# {keyword.title()}\n\n",
281
+ "|Publishers|Abbreviations|" + "|".join(flags) + "|\n",
282
+ "|-|-|" + "|".join(["-" for _ in flags]) + "|\n",
283
+ ]
284
+
285
+ for publisher in keyword_publisher_abbr[keyword]:
286
+ for abbr in keyword_publisher_abbr[keyword][publisher]:
287
+ lines = [
288
+ f"[Link]({folder_name}/{cj.title()}/{ff}/{publisher.lower()}/{abbr}/{abbr}.html)"
289
+ for ff in folder_flags
290
+ ]
291
+
292
+ if any(self._check_file_exists(ff, folder_name, cj, publisher, abbr) for ff in folder_flags):
293
+ data_list.append(f"|{publisher}|{abbr}|" + "|".join(lines) + "|\n")
294
+
295
+ if len(data_list) == 3:
296
+ continue
297
+
298
+ self._write_md_file(data_list, f"{folder_name}/{cj.title()}_Keywords", f"{keyword.replace(' ', '_')}.md")
299
+
300
+ # Pandoc
301
+ self._convert_md_to_html_keyword(folder_name, cj, keyword)
302
+
303
+ all_data_list.append(
304
+ f"|{idx}|{keyword}|[Link]({folder_name}/{cj.title()}_Keywords/{keyword.replace(' ', '_')}.html)|\n"
305
+ )
306
+
307
+ idx += 1
308
+
309
+ self._write_md_file(all_data_list, f"{folder_name}", f"{cj.title()}_Keywords.md")
310
+
311
+ def _check_file_exists(self, folder, folder_name, cj, publisher, abbr):
312
+ """Check if HTML file exists for given parameters."""
313
+ file_path = os.path.join(
314
+ self.data_base_path, f"{folder_name}/{cj.title()}/{folder}/{publisher.lower()}/{abbr}/{abbr}.html"
315
+ )
316
+ return os.path.exists(file_path)
317
+
318
+ def _convert_md_to_html_keyword(self, folder_name, cj, keyword):
319
+ """Convert markdown file to HTML using pandoc."""
320
+ base_path = os.path.join(self.data_base_path, f"{folder_name}/{cj.title()}_Keywords")
321
+ file_md = os.path.join(base_path, f"{keyword.replace(' ', '_')}.md")
322
+ file_html = os.path.join(base_path, f"{keyword.replace(' ', '_')}.html")
323
+
324
+ try:
325
+ os.system(f"pandoc {file_md} -o {file_html}")
326
+ os.remove(file_md)
327
+ except Exception as e:
328
+ print(f"Pandoc conversion error: {e}")
329
+
330
+ def _process_keywords(self, cj: str, json_data: dict):
331
+ keyword_publisher_abbr = {}
332
+
333
+ for publisher in json_data:
334
+ for abbr in json_data[publisher][cj.lower()]:
335
+
336
+ keywords_dict = json_data[publisher][cj.lower()][abbr].get("keywords_dict", {})
337
+
338
+ # Clean and sort keywords
339
+ cleaned_keywords = {}
340
+ for category, words in keywords_dict.items():
341
+ if category.strip():
342
+ sorted_words = sorted(set([word.strip() for word in words if word.strip()]))
343
+ cleaned_keywords[category.strip()] = sorted_words
344
+
345
+ # For category
346
+ # Flatten keywords and remove duplicates
347
+ all_keywords = []
348
+ for category, words in cleaned_keywords.items():
349
+ all_keywords.extend(words)
350
+ all_keywords.append(category)
351
+ all_keywords = sorted(set(all_keywords))
352
+
353
+ for keyword in all_keywords:
354
+ keyword_publisher_abbr.setdefault(keyword, {}).setdefault(publisher, []).append(abbr)
355
+
356
+ return keyword_publisher_abbr