pyeasyphd 0.4.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. pyeasyphd/.python-version +1 -0
  2. pyeasyphd/Main.sublime-menu +43 -0
  3. pyeasyphd/__init__.py +5 -0
  4. pyeasyphd/data/templates/csl/apa-no-ampersand.csl +2183 -0
  5. pyeasyphd/data/templates/csl/apa.csl +2133 -0
  6. pyeasyphd/data/templates/csl/ieee.csl +512 -0
  7. pyeasyphd/data/templates/tex/Article.tex +38 -0
  8. pyeasyphd/data/templates/tex/Article_Header.tex +29 -0
  9. pyeasyphd/data/templates/tex/Article_Tail.tex +3 -0
  10. pyeasyphd/data/templates/tex/Beamer_Header.tex +79 -0
  11. pyeasyphd/data/templates/tex/Beamer_Tail.tex +14 -0
  12. pyeasyphd/data/templates/tex/Style.tex +240 -0
  13. pyeasyphd/data/templates/tex/TEVC_Header.tex +52 -0
  14. pyeasyphd/data/templates/tex/TEVC_Tail.tex +4 -0
  15. pyeasyphd/data/templates/tex/eisvogel.tex +1064 -0
  16. pyeasyphd/data/templates/tex/math.tex +201 -0
  17. pyeasyphd/data/templates/tex/math_commands.tex +677 -0
  18. pyeasyphd/data/templates/tex/nextaimathmacros.sty +681 -0
  19. pyeasyphd/main/__init__.py +6 -0
  20. pyeasyphd/main/basic_input.py +101 -0
  21. pyeasyphd/main/pandoc_md_to.py +380 -0
  22. pyeasyphd/main/python_run_md.py +320 -0
  23. pyeasyphd/main/python_run_tex.py +200 -0
  24. pyeasyphd/pyeasyphd.py +86 -0
  25. pyeasyphd/pyeasyphd.sublime-settings +100 -0
  26. pyeasyphd/pyeasyphd.sublime-syntax +5 -0
  27. pyeasyphd/scripts/__init__.py +34 -0
  28. pyeasyphd/scripts/_base.py +65 -0
  29. pyeasyphd/scripts/run_article_md.py +101 -0
  30. pyeasyphd/scripts/run_article_tex.py +94 -0
  31. pyeasyphd/scripts/run_beamer_tex.py +84 -0
  32. pyeasyphd/scripts/run_compare.py +71 -0
  33. pyeasyphd/scripts/run_format.py +62 -0
  34. pyeasyphd/scripts/run_generate.py +211 -0
  35. pyeasyphd/scripts/run_replace.py +34 -0
  36. pyeasyphd/scripts/run_search.py +251 -0
  37. pyeasyphd/tools/__init__.py +12 -0
  38. pyeasyphd/tools/generate/generate_from_bibs.py +181 -0
  39. pyeasyphd/tools/generate/generate_html.py +166 -0
  40. pyeasyphd/tools/generate/generate_library.py +203 -0
  41. pyeasyphd/tools/generate/generate_links.py +400 -0
  42. pyeasyphd/tools/py_run_bib_md_tex.py +398 -0
  43. pyeasyphd/tools/search/data.py +282 -0
  44. pyeasyphd/tools/search/search_base.py +146 -0
  45. pyeasyphd/tools/search/search_core.py +400 -0
  46. pyeasyphd/tools/search/search_keywords.py +229 -0
  47. pyeasyphd/tools/search/search_writers.py +350 -0
  48. pyeasyphd/tools/search/utils.py +190 -0
  49. pyeasyphd/utils/utils.py +99 -0
  50. pyeasyphd-0.4.42.dist-info/METADATA +33 -0
  51. pyeasyphd-0.4.42.dist-info/RECORD +53 -0
  52. pyeasyphd-0.4.42.dist-info/WHEEL +4 -0
  53. pyeasyphd-0.4.42.dist-info/licenses/LICENSE +674 -0
@@ -0,0 +1,211 @@
1
+ import os
2
+
3
+ from pyeasyphd.tools import PaperLinksGenerator, generate_from_bibs_and_write
4
+ from pyeasyphd.utils.utils import is_last_week_of_month
5
+
6
+ from ._base import build_base_options, expand_paths
7
+
8
+
9
+ def run_generate_j_weekly(
10
+ options: dict,
11
+ path_weekly_docs: str,
12
+ keywords_category_names: list[str],
13
+ path_spidering_bibs: str,
14
+ path_conf_j_jsons: str,
15
+ ):
16
+ # Expand and normalize file paths
17
+ path_weekly_docs, path_spidering_bibs = expand_paths(path_weekly_docs, path_spidering_bibs)
18
+
19
+ # Update options
20
+ options_ = build_base_options([], [], ["arXiv"], [], path_conf_j_jsons)
21
+ options_.update(options)
22
+
23
+ # Generate md and html files
24
+ for gc in ["generate_data", "combine_data"]:
25
+ path_storage = os.path.join(path_spidering_bibs, "spider_j")
26
+ output_basename = os.path.join("data", "Weekly")
27
+ path_output = os.path.expanduser(os.path.join(path_weekly_docs, output_basename, "Journals"))
28
+ # "current_issue", "current_month"
29
+ for flag in ["current_issue", "current_month"]:
30
+ generate_from_bibs_and_write(
31
+ path_storage, path_output, output_basename, "Journals", gc, "current_year", flag, options_
32
+ )
33
+
34
+ # Generate links
35
+ for keywords_category_name in keywords_category_names:
36
+ full_json_c, full_json_j, full_json_k = (
37
+ options_["full_json_c"],
38
+ options_["full_json_j"],
39
+ options_["full_json_k"],
40
+ )
41
+ output_basename = os.path.join("data", "Weekly")
42
+ generator = PaperLinksGenerator(full_json_c, full_json_j, full_json_k, path_weekly_docs, keywords_category_name)
43
+ generator.generate_weekly_links(output_basename)
44
+ generator.generate_keywords_links_weekly("Journals", output_basename)
45
+
46
+
47
+ def run_generate_j_e_weekly(
48
+ options: dict,
49
+ path_weekly_docs: str,
50
+ keywords_category_names: list[str],
51
+ path_spidering_bibs: str,
52
+ path_conf_j_jsons: str,
53
+ ):
54
+ # Expand and normalize file paths
55
+ path_weekly_docs, path_spidering_bibs = expand_paths(path_weekly_docs, path_spidering_bibs)
56
+
57
+ # Update options
58
+ options_ = build_base_options([], [], ["arXiv"], [], path_conf_j_jsons)
59
+ options_.update(options)
60
+ options_["early_access"] = True
61
+
62
+ # Generate md and html files
63
+ for gc in ["generate_data", "combine_data"]:
64
+ path_storage = os.path.join(path_spidering_bibs, "spider_j_e")
65
+ output_basename = os.path.join("data", "Weekly")
66
+ path_output = os.path.expanduser(os.path.join(path_weekly_docs, output_basename, "Journals"))
67
+ # "current_month"
68
+ for flag in ["current_month"]:
69
+ generate_from_bibs_and_write(
70
+ path_storage, path_output, output_basename, "Journals", gc, "current_year", flag, options_
71
+ )
72
+
73
+ # "all_years"
74
+ for year in ["all_years"]:
75
+ generate_from_bibs_and_write(
76
+ path_storage, path_output, output_basename, "Journals", gc, year, "all_months", options_
77
+ )
78
+
79
+ # Generate links
80
+ for keywords_category_name in keywords_category_names:
81
+ full_json_c, full_json_j, full_json_k = (
82
+ options_["full_json_c"],
83
+ options_["full_json_j"],
84
+ options_["full_json_k"],
85
+ )
86
+ output_basename = os.path.join("data", "Weekly")
87
+ generator = PaperLinksGenerator(full_json_c, full_json_j, full_json_k, path_weekly_docs, keywords_category_name)
88
+ generator.generate_ieee_early_access_links(output_basename)
89
+ generator.generate_keywords_links_weekly("Journals", output_basename)
90
+
91
+
92
+ def run_generate_j_monthly(
93
+ options: dict,
94
+ path_monthly_docs: str,
95
+ keywords_category_names: list[str],
96
+ path_spidering_bibs: str,
97
+ path_conf_j_jsons: str,
98
+ ):
99
+ # Expand and normalize file paths
100
+ path_monthly_docs, path_spidering_bibs = expand_paths(path_monthly_docs, path_spidering_bibs)
101
+
102
+ # Update options
103
+ options_ = build_base_options([], [], ["arXiv"], [], path_conf_j_jsons)
104
+ options_.update(options)
105
+
106
+ # Generate md and html files
107
+ for gc in ["generate_data", "combine_data"]:
108
+ path_storage = os.path.join(path_spidering_bibs, "spider_j")
109
+ output_basename = os.path.join("data", "Monthly")
110
+ path_output = os.path.expanduser(os.path.join(path_monthly_docs, output_basename, "Journals"))
111
+ # "all_months"
112
+ for flag in ["all_months"]:
113
+ if flag == "all_months":
114
+ if not is_last_week_of_month():
115
+ continue
116
+
117
+ generate_from_bibs_and_write(
118
+ path_storage, path_output, output_basename, "Journals", gc, "current_year", flag, options_
119
+ )
120
+
121
+ # Generate links
122
+ for keywords_category_name in keywords_category_names:
123
+ full_json_c, full_json_j, full_json_k = (
124
+ options_["full_json_c"],
125
+ options_["full_json_j"],
126
+ options_["full_json_k"],
127
+ )
128
+ output_basename = os.path.join("data", "Monthly")
129
+ generator = PaperLinksGenerator(
130
+ full_json_c, full_json_j, full_json_k, path_monthly_docs, keywords_category_name
131
+ )
132
+ generator.generate_monthly_links(output_basename)
133
+ generator.generate_keywords_links_monthly("Journals", output_basename)
134
+
135
+
136
+ def run_generate_j_yearly(
137
+ options: dict,
138
+ path_yearly_docs: str,
139
+ keywords_category_names: list[str],
140
+ path_spidered_bibs: str,
141
+ path_conf_j_jsons: str,
142
+ year_list: list[str],
143
+ ):
144
+ # Expand and normalize file paths
145
+ path_yearly_docs, path_spidered_bibs = expand_paths(path_yearly_docs, path_spidered_bibs)
146
+
147
+ # Update options
148
+ options_ = build_base_options([], [], ["arXiv"], [], path_conf_j_jsons)
149
+ options_.update(options)
150
+
151
+ # Generate md and html files
152
+ for gc in ["generate_data", "combine_data"]:
153
+ path_storage = os.path.join(path_spidered_bibs, "Journals")
154
+ output_basename = os.path.join("data", "Yearly")
155
+ path_output = os.path.expanduser(os.path.join(path_yearly_docs, output_basename, "Journals"))
156
+ # "2024", "2023", "2022", "2021", "2020", "2019", "2018", "2017", "2016", "2015"
157
+ for year in year_list:
158
+ generate_from_bibs_and_write(
159
+ path_storage, path_output, output_basename, "Journals", gc, [year], "all_months", options_
160
+ )
161
+
162
+ # Generate links
163
+ for keywords_category_name in keywords_category_names:
164
+ full_json_c, full_json_j, full_json_k = (
165
+ options_["full_json_c"],
166
+ options_["full_json_j"],
167
+ options_["full_json_k"],
168
+ )
169
+ output_basename = os.path.join("data", "Yearly")
170
+ generator = PaperLinksGenerator(full_json_c, full_json_j, full_json_k, path_yearly_docs, keywords_category_name)
171
+ generator.generate_yearly_links("Journals", output_basename)
172
+ generator.generate_keywords_links_yearly("Journals", output_basename)
173
+
174
+
175
+ def run_generate_c_yearly(
176
+ options: dict,
177
+ path_yearly_docs: str,
178
+ keywords_category_names: list[str],
179
+ path_spidered_bibs: str,
180
+ path_conf_j_jsons: str,
181
+ year_list: list[str],
182
+ ):
183
+ # Expand and normalize file paths
184
+ path_yearly_docs, path_spidered_bibs = expand_paths(path_yearly_docs, path_spidered_bibs)
185
+
186
+ # Update options
187
+ options_ = build_base_options([], [], ["arXiv"], [], path_conf_j_jsons)
188
+ options_.update(options)
189
+
190
+ # Generate md and html files
191
+ for gc in ["generate_data", "combine_data"]:
192
+ path_storage = os.path.join(path_spidered_bibs, "Conferences")
193
+ output_basename = os.path.join("data", "Yearly")
194
+ path_output = os.path.expanduser(os.path.join(path_yearly_docs, output_basename, "Conferences"))
195
+ # "2025", "2024", "2023", "2022", "2021", "2020", "2019", "2018", "2017", "2016", "2015"
196
+ for year in year_list:
197
+ generate_from_bibs_and_write(
198
+ path_storage, path_output, output_basename, "Conferences", gc, [year], "all_months", options_
199
+ )
200
+
201
+ # Generate links
202
+ for keywords_category_name in keywords_category_names:
203
+ full_json_c, full_json_j, full_json_k = (
204
+ options_["full_json_c"],
205
+ options_["full_json_j"],
206
+ options_["full_json_k"],
207
+ )
208
+ output_basename = os.path.join("data", "Yearly")
209
+ generator = PaperLinksGenerator(full_json_c, full_json_j, full_json_k, path_yearly_docs, keywords_category_name)
210
+ generator.generate_yearly_links("Conferences", output_basename)
211
+ generator.generate_keywords_links_yearly("Conferences", output_basename)
@@ -0,0 +1,34 @@
1
+ from pybibtexer.tools import replace_to_standard_cite_keys
2
+
3
+ from ._base import build_base_options, expand_paths
4
+
5
+
6
+ def run_replace_to_standard_cite_keys(
7
+ full_tex_md: str, full_bib: str, path_output: str, path_conf_j_jsons: str, options: dict | None = None
8
+ ) -> None:
9
+ """Replace citation keys in LaTeX documents with standardized versions.
10
+
11
+ Processes LaTeX and BibTeX files to normalize citation keys according to
12
+ configuration standards, then outputs the results to the specified location.
13
+
14
+ Args:
15
+ options: dictionary of configuration options for citation processing
16
+ full_tex_md: Path to TeX or Markdown file containing citations
17
+ full_bib: Path to the BibTeX bibliography file
18
+ path_output: Output directory path for processed files
19
+ path_conf_j_jsons: Path to journal configuration JSON files
20
+
21
+ Returns:
22
+ None: Results are written to the output directory
23
+ """
24
+ if options is None:
25
+ options = {}
26
+
27
+ # Expand and normalize file paths
28
+ full_tex_md, full_bib, path_output = expand_paths(full_tex_md, full_bib, path_output)
29
+
30
+ # Update options
31
+ options_ = build_base_options([], [], [], [], path_conf_j_jsons)
32
+ options_.update(options)
33
+
34
+ replace_to_standard_cite_keys(full_tex_md, full_bib, path_output, options=options_)
@@ -0,0 +1,251 @@
1
+ import os
2
+ from typing import Any
3
+
4
+ from pyadvtools import transform_to_data_list
5
+ from pybibtexer.tools import compare_bibs_with_zotero
6
+
7
+ from pyeasyphd.tools import Searchkeywords
8
+
9
+ from ._base import build_base_options, build_search_options, expand_path, expand_paths
10
+
11
+
12
+ def run_search_for_screen(
13
+ acronym: str, year: int, title: str, path_spidered_bibs: str, path_spidering_bibs: str, path_conf_j_jsons: str
14
+ ) -> None:
15
+ """Run search for screen display with specific conference/journal parameters.
16
+
17
+ Args:
18
+ acronym: Conference/journal acronym to search for
19
+ year: Publication year to filter by (0 means all years)
20
+ title: Paper title used as search keyword
21
+ path_spidered_bibs: Path to spidered bibliography files
22
+ path_spidering_bibs: Path to spidering bibliography files
23
+ path_conf_j_jsons: Path to conferences/journals JSON files
24
+ """
25
+ # Handle year filtering: if year is 0, search all years (empty list)
26
+ search_year_list = [str(year)]
27
+ if year == 0:
28
+ search_year_list = [] # Empty list means no year filtering
29
+
30
+ # Expand and normalize file paths
31
+ path_spidered_bibs, path_spidering_bibs, path_conf_j_jsons = expand_paths(
32
+ path_spidered_bibs, path_spidering_bibs, path_conf_j_jsons
33
+ )
34
+
35
+ # Configure search options
36
+ options = {
37
+ **build_base_options(
38
+ include_publisher_list=[],
39
+ include_abbr_list=[acronym],
40
+ exclude_publisher_list=["arXiv"],
41
+ exclude_abbr_list=[],
42
+ path_conf_j_jsons=path_conf_j_jsons,
43
+ ),
44
+ **build_search_options(
45
+ print_on_screen=True,
46
+ search_year_list=search_year_list, # Empty list for all years, otherwise specific year
47
+ keywords_type="Temp",
48
+ keywords_list_list=[[title]], # Use title as search keyword
49
+ ),
50
+ }
51
+
52
+ # Execute searches across different bibliography sources
53
+ _execute_searches(options, "", path_spidered_bibs, path_spidering_bibs, True, True)
54
+
55
+ return None
56
+
57
+
58
+ def run_search_for_files(
59
+ keywords_type: str,
60
+ keywords_list_list: list[list[str]],
61
+ path_main_output: str,
62
+ path_spidered_bibs: str,
63
+ path_spidering_bibs: str,
64
+ path_conf_j_jsons: str,
65
+ search_in_spidered_bibs: bool = False,
66
+ search_in_spidering_bibs: bool = True,
67
+ options: dict | None = None,
68
+ ) -> None:
69
+ """Run search and save results to files with custom keywords.
70
+
71
+ Args:
72
+ keywords_type: Category name for the search keywords (used for organizing results)
73
+ keywords_list_list: Nested list of keywords to search for (each inner list represents a search group)
74
+ path_main_output: Main output directory for search results
75
+ path_spidered_bibs: Path to spidered bibliography files
76
+ path_spidering_bibs: Path to spidering bibliography files
77
+ path_conf_j_jsons: Path to conferences/journals JSON files
78
+ search_in_spidered_bibs: Whether to search in spidered bibliography files
79
+ search_in_spidering_bibs: Whether to search in spidering bibliography files
80
+ options: Additional search options to override defaults
81
+ """
82
+ # Initialize options dictionary if not provided
83
+ if options is None:
84
+ options = {}
85
+
86
+ # Expand and normalize file paths to ensure consistent path formatting
87
+ path_main_output = expand_path(path_main_output)
88
+ path_spidered_bibs, path_spidering_bibs, path_conf_j_jsons = expand_paths(
89
+ path_spidered_bibs, path_spidering_bibs, path_conf_j_jsons
90
+ )
91
+
92
+ # Configure search options by combining base options and search-specific options
93
+ options_ = {
94
+ **build_base_options(
95
+ include_publisher_list=[], # No specific publishers to include
96
+ include_abbr_list=[], # No specific conference/journal abbreviations to include
97
+ exclude_publisher_list=["arXiv"], # Exclude arXiv publications from search
98
+ exclude_abbr_list=[], # No specific conference/journal abbreviations to exclude
99
+ path_conf_j_jsons=path_conf_j_jsons, # Path to conference/journal metadata
100
+ ),
101
+ **build_search_options(
102
+ print_on_screen=False, # Disable screen output (results go to files only)
103
+ search_year_list=[], # Empty list means search all years (no year filtering)
104
+ keywords_type=keywords_type, # Use provided keyword category for result organization
105
+ keywords_list_list=keywords_list_list, # Use provided nested keyword lists for searching
106
+ ),
107
+ }
108
+ # Update with any additional options provided by caller (overrides defaults)
109
+ options_.update(options)
110
+
111
+ # Execute searches across different bibliography sources with configured options
112
+ _execute_searches(
113
+ options_,
114
+ path_main_output,
115
+ path_spidered_bibs,
116
+ path_spidering_bibs,
117
+ search_in_spidered_bibs, # Flag to control spidered bibliography search
118
+ search_in_spidering_bibs, # Flag to control spidering bibliography search
119
+ )
120
+
121
+ return None
122
+
123
+
124
+ def _execute_searches(
125
+ options: dict[str, Any],
126
+ path_main_output: str,
127
+ path_spidered_bibs: str,
128
+ path_spidering_bibs: str,
129
+ search_in_spidered_bibs: bool = False,
130
+ search_in_spidering_bibs: bool = True,
131
+ ) -> None:
132
+ """Execute searches across different bibliography sources.
133
+
134
+ Args:
135
+ options: Search configuration options
136
+ path_main_output: Base path for search results output
137
+ path_spidered_bibs: Path to spidered bibliography files
138
+ path_spidering_bibs: Path to spidering bibliography files
139
+ search_in_spidered_bibs: Whether to search in spidered bibliography files
140
+ search_in_spidering_bibs: Whether to search in spidering bibliography files
141
+ """
142
+ # Search in spidered bibliographies (Conferences and Journals)
143
+ # If enabled, search through completed/conference and journal bibliographies
144
+ if search_in_spidered_bibs:
145
+ for cj in ["Conferences", "Journals"]:
146
+ # Construct path to stored bibliography files for conferences/journals
147
+ path_storage = os.path.join(path_spidered_bibs, cj)
148
+ # Construct output path for search results
149
+ path_output = os.path.join(path_main_output, "Search_spidered_bib", cj)
150
+ # Execute search with given options and paths
151
+ Searchkeywords(path_storage, path_output, options).run()
152
+
153
+ # Search in spidering bibliographies (Journals and Journals Early Access)
154
+ # If enabled, search through actively spidering/in-progress journal bibliographies
155
+ if search_in_spidering_bibs:
156
+ for je in ["spider_j", "spider_j_e"]:
157
+ # Construct path to spidering bibliography files (journals and early access)
158
+ path_storage = os.path.join(path_spidering_bibs, je)
159
+ # Construct output path for search results
160
+ path_output = os.path.join(path_main_output, "Search_spidering_bib", je)
161
+ # Execute search with given options and paths
162
+ Searchkeywords(path_storage, path_output, options).run()
163
+
164
+ return None
165
+
166
+
167
+ def run_compare_after_search(zotero_bib: str, keywords_type: str, path_main_output: str, path_conf_j_jsons: str):
168
+ """Compare search results with Zotero bibliography and generate comparison report.
169
+
170
+ Args:
171
+ zotero_bib: Path to Zotero bibliography file
172
+ keywords_type: Category name for the search keywords used
173
+ path_main_output: Main output directory for search results and comparison
174
+ path_conf_j_jsons: Path to conferences/journals JSON files
175
+ """
176
+ # Expand and normalize file paths
177
+ zotero_bib = expand_path(zotero_bib)
178
+ path_main_output = expand_path(path_main_output)
179
+ path_conf_j_jsons = expand_path(path_conf_j_jsons)
180
+
181
+ # Configure search options
182
+ options = {
183
+ **build_base_options(
184
+ include_publisher_list=[],
185
+ include_abbr_list=[],
186
+ exclude_publisher_list=["arXiv"],
187
+ exclude_abbr_list=[],
188
+ path_conf_j_jsons=path_conf_j_jsons,
189
+ ),
190
+ **build_search_options(
191
+ print_on_screen=False, search_year_list=[], keywords_type=keywords_type, keywords_list_list=[]
192
+ ),
193
+ }
194
+
195
+ # Download bibliography files from local search results
196
+ download_bib = _download_bib_from_local(path_main_output, keywords_type)
197
+
198
+ # Generate comparison output path and run comparison
199
+ path_output = os.path.join(path_main_output, "Compared")
200
+ compare_bibs_with_zotero(zotero_bib, download_bib, path_output, options)
201
+
202
+ return None
203
+
204
+
205
+ def _generate_data_list(path_output: str, folder_name: str, keywords_type: str) -> list[str]:
206
+ """Extract bibliography data content from files in specified folder structure.
207
+
208
+ Args:
209
+ path_output: Base output path for search results
210
+ folder_name: Specific folder name within the output structure
211
+ keywords_type: Category name for the search keywords used
212
+
213
+ Returns:
214
+ List of bibliography data content extracted from .bib files in the specified folders
215
+ """
216
+ data_list = []
217
+
218
+ # Extract data from both title and abstract bibliography folders
219
+ for bib_type in ["title-bib-zotero", "abstract-bib-zotero"]:
220
+ folder_path = os.path.join(path_output, f"{folder_name}-Separate", "article", keywords_type, bib_type)
221
+
222
+ # Extract bibliography data content if folder exists
223
+ if os.path.exists(folder_path):
224
+ data_list.extend(transform_to_data_list(folder_path, ".bib"))
225
+
226
+ return data_list
227
+
228
+
229
+ def _download_bib_from_local(path_output: str, keywords_type: str) -> list[str]:
230
+ """Collect bibliography data content from all local search result directories.
231
+
232
+ Args:
233
+ path_output: Base output path containing search results
234
+ keywords_type: Category name for the search keywords used
235
+
236
+ Returns:
237
+ Combined list of bibliography data content from all .bib files in search results
238
+ """
239
+ data_list = []
240
+
241
+ # Collect data from spidered bibliographies (Conferences and Journals)
242
+ for cj in ["Conferences", "Journals"]:
243
+ folder_name = os.path.join("Search_spidered_bib", cj)
244
+ data_list.extend(_generate_data_list(path_output, folder_name, keywords_type))
245
+
246
+ # Collect data from spidering bibliographies (journal sources)
247
+ for je in ["spider_j", "spider_j_e"]:
248
+ folder_name = os.path.join("Search_spidering_bib", je)
249
+ data_list.extend(_generate_data_list(path_output, folder_name, keywords_type))
250
+
251
+ return data_list
@@ -0,0 +1,12 @@
1
+ """Tools module for PyEasyPhD advanced functionality.
2
+
3
+ This module provides advanced tools for bibliography processing,
4
+ search functionality, and content generation.
5
+ """
6
+
7
+ __all__ = ["PyRunBibMdTex", "Searchkeywords", "generate_from_bibs_and_write", "PaperLinksGenerator"]
8
+
9
+ from .generate.generate_from_bibs import generate_from_bibs_and_write
10
+ from .generate.generate_links import PaperLinksGenerator
11
+ from .py_run_bib_md_tex import PyRunBibMdTex
12
+ from .search.search_keywords import Searchkeywords
@@ -0,0 +1,181 @@
1
+ import os
2
+ import re
3
+ from typing import Any
4
+
5
+ from pyadvtools import standard_path, write_list
6
+ from pybibtexer.tools.experiments_base import generate_standard_publisher_abbr_options_dict
7
+
8
+ from ...main import PandocMdTo
9
+ from .generate_html import generate_html_content, generate_html_from_bib_data
10
+ from .generate_library import generate_library_by_filters
11
+
12
+
13
+ def preparation(
14
+ path_storage: str,
15
+ path_output: str,
16
+ output_basename: str,
17
+ pub_type: str,
18
+ issue_or_month_flag: str | list[str] = "current_issue",
19
+ year_flag: str | list[str] = "current_year",
20
+ options: dict[str, Any] | None = None,
21
+ ):
22
+ """Prepare paths and flags for data generation.
23
+
24
+ Args:
25
+ path_storage (str): Path to storage directory.
26
+ path_output (str): Path to output directory.
27
+ output_basename (str): Base name for output files.
28
+ pub_type (str): Type of publication.
29
+ issue_or_month_flag (str | list[str], optional): Issue or month flag. Defaults to "current_issue".
30
+ year_flag (str | list[str], optional): Year flag. Defaults to "current_year".
31
+ options (dict[str, Any], optional): Additional options. Defaults to {}.
32
+
33
+ Examples:
34
+ | | current_issue | current_month | all_months |
35
+ |--------------|---------------|---------------|------------|
36
+ | current_year | YES | YES | YES |
37
+ | all_years | NO | NO | YES |
38
+ | given_years | NO | NO | YES |
39
+
40
+ given_years = ["2020", "2025"]
41
+
42
+ Returns:
43
+ tuple[str, str, bool]: Returns (path_root, path_output, combine_flag).
44
+ """
45
+ if options is None:
46
+ options = {}
47
+
48
+ # default settings
49
+ path_storage = standard_path(path_storage)
50
+ path_output = standard_path(path_output)
51
+
52
+ # "absolute_path" or "relative_path"
53
+ absolute_or_relative_path = options.get("absolute_or_relative_path", "absolute_path")
54
+
55
+ # Create path components
56
+ yy = "-".join(year_flag) if isinstance(year_flag, list) else year_flag
57
+ im = "-".join(issue_or_month_flag) if isinstance(issue_or_month_flag, list) else issue_or_month_flag
58
+
59
+ if options.get("early_access", False):
60
+ base_path = os.path.join(output_basename, f"{pub_type.title()}_Early_Access", f"{yy}_{im}")
61
+ path_output = os.path.join(path_output + "_Early_Access", f"{yy}_{im}")
62
+ else:
63
+ base_path = os.path.join(output_basename, f"{pub_type.title()}", f"{yy}_{im}")
64
+ path_output = os.path.join(path_output, f"{yy}_{im}")
65
+
66
+ path_root = base_path if absolute_or_relative_path == "absolute_path" else ""
67
+
68
+ # Determine combine flag
69
+ b = options.get("early_access", False) and (year_flag != "all_years")
70
+ c = year_flag == "current_year"
71
+ c = c and (not isinstance(issue_or_month_flag, list)) and (issue_or_month_flag != "all_months")
72
+ combine_flag = b or c
73
+
74
+ return path_root, path_output, combine_flag
75
+
76
+
77
+ def generate_from_bibs_and_write(
78
+ path_storage: str,
79
+ path_output: str,
80
+ output_basename: str,
81
+ pub_type: str,
82
+ generate_or_combine: str,
83
+ year_flag: str | list[str] = "current_year",
84
+ issue_or_month_flag: str | list[str] = "current_issue",
85
+ options: dict[str, Any] | None = None,
86
+ ) -> None:
87
+ """Generate or combine data from bibliographies.
88
+
89
+ Args:
90
+ path_storage (str): Path to storage directory.
91
+ path_output (str): Path to output directory.
92
+ output_basename (str): Base name for output files.
93
+ pub_type (str): Type of publication.
94
+ generate_or_combine (str): Either "generate_data" or "combine_data".
95
+ year_flag (str | list[str], optional): Flag for year selection. Defaults to "current_year".
96
+ issue_or_month_flag (str | list[str], optional): Flag for issue/month selection. Defaults to "current_issue".
97
+ options (dict[str, Any], optional): Additional options. Defaults to {}.
98
+ """
99
+ if options is None:
100
+ options = {}
101
+
102
+ path_root, path_output, combine_flag = preparation(
103
+ path_storage, path_output, output_basename, pub_type, issue_or_month_flag, year_flag, options
104
+ )
105
+
106
+ if generate_or_combine == "generate_data":
107
+ publisher_abbr_dict = generate_standard_publisher_abbr_options_dict(path_storage, options)
108
+ for publisher in publisher_abbr_dict:
109
+ pp = os.path.join(path_output, publisher.lower())
110
+
111
+ publisher_html_body = []
112
+ # Separate for abbr
113
+ for abbr in publisher_abbr_dict[publisher]:
114
+ print(f"*** Processing {publisher.upper()}: {abbr} ***")
115
+ new_options = publisher_abbr_dict[publisher][abbr]
116
+
117
+ # Get bibliography path
118
+ path_abbr = os.path.join(path_storage, publisher.lower(), abbr)
119
+ if isinstance(year_flag, str) and year_flag.isdigit():
120
+ for root, _, files in os.walk(path_abbr, topdown=True):
121
+ files = [f for f in files if f.endswith(".bib")]
122
+ if files := [f for f in files if re.search(f"_{year_flag}.bib", f)]:
123
+ path_abbr = os.path.join(root, files[0])
124
+
125
+ # Generate and process library
126
+ library = generate_library_by_filters(path_abbr, issue_or_month_flag, year_flag, new_options)
127
+
128
+ # Generate md, tex, pdf, html
129
+ html_body = generate_html_from_bib_data(abbr, library, pp, new_options)
130
+ if combine_flag and html_body:
131
+ publisher_html_body.extend([*html_body, "\n"])
132
+
133
+ # Combine for publisher
134
+ if publisher_html_body:
135
+ html_content = generate_html_content(publisher_html_body[:-1], publisher)
136
+ write_list(html_content, f"{publisher}_all.html", "w", pp, False)
137
+
138
+ elif generate_or_combine == "combine_data":
139
+ _combine_data(path_storage, path_root, path_output, combine_flag, options)
140
+
141
+ return None
142
+
143
+
144
+ def _combine_data(path_storage, path_root, path_output, combine_flag, options):
145
+ """Combine data from multiple sources.
146
+
147
+ Args:
148
+ path_storage: Path to storage directory.
149
+ path_root: Root path for output.
150
+ path_output: Path to output directory.
151
+ combine_flag: Flag indicating whether to combine data.
152
+ options: Configuration options.
153
+ """
154
+ # Compulsory
155
+ options["include_abbr_list"] = []
156
+ options["exclude_abbr_list"] = []
157
+ publisher_abbr_dict = generate_standard_publisher_abbr_options_dict(path_storage, options)
158
+ for publisher in publisher_abbr_dict:
159
+ print(f"*** Combining papers for {publisher.upper()} ***")
160
+ pp = os.path.join(path_output, publisher.lower())
161
+ absolute_path = os.path.join(path_root, publisher) if len(path_root) > 0 else ""
162
+
163
+ link = [f"# {publisher.upper()}\n\n"]
164
+ for abbr in publisher_abbr_dict[publisher]:
165
+ if os.path.exists(os.path.join(pp, abbr, f"{abbr}.html")):
166
+ ll = os.path.join(absolute_path, abbr, f"{abbr}.html")
167
+ link.append(f"- [{abbr}]({ll})\n")
168
+
169
+ if combine_flag:
170
+ ll = os.path.join(absolute_path, f"{publisher}_all.html")
171
+ link.insert(1, f"- [All Journals]({ll})\n")
172
+
173
+ # Process combined content
174
+ if len(link) > 1:
175
+ write_list(link, f"{publisher}_link.md", "w", pp, False)
176
+ PandocMdTo({}).pandoc_md_to_html(pp, pp, f"{publisher}_link.md", f"{publisher}_link.html", True)
177
+
178
+ # Clean up
179
+ for name in ["_link"]:
180
+ if os.path.exists(file := os.path.join(pp, f"{publisher}{name}.md")):
181
+ os.remove(file)