pyeasyphd 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyeasyphd might be problematic. Click here for more details.

Files changed (75) hide show
  1. pyeasyphd/.python-version +1 -1
  2. pyeasyphd/main/__init__.py +0 -4
  3. pyeasyphd/main/basic_input.py +7 -63
  4. pyeasyphd/main/python_run_md.py +3 -3
  5. pyeasyphd/main/python_run_tex.py +1 -1
  6. pyeasyphd/pyeasyphd.sublime-settings +2 -160
  7. pyeasyphd/tools/__init__.py +1 -16
  8. pyeasyphd/tools/generate/generate_from_bibs.py +54 -330
  9. pyeasyphd/tools/generate/generate_html.py +122 -0
  10. pyeasyphd/tools/generate/generate_library.py +188 -0
  11. pyeasyphd/tools/generate/generate_links.py +13 -4
  12. pyeasyphd/tools/py_run_bib_md_tex.py +12 -13
  13. pyeasyphd/tools/search/search_base.py +8 -5
  14. pyeasyphd/tools/search/search_core.py +4 -3
  15. pyeasyphd/tools/search/search_keywords.py +1 -1
  16. pyeasyphd/tools/search/search_writers.py +8 -5
  17. {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/METADATA +3 -6
  18. pyeasyphd-0.1.2.dist-info/RECORD +27 -0
  19. pyeasyphd/bib/__init__.py +0 -1
  20. pyeasyphd/bib/bibtexbase/__init__.py +0 -7
  21. pyeasyphd/bib/bibtexbase/standardize/_base.py +0 -36
  22. pyeasyphd/bib/bibtexbase/standardize/default_data.py +0 -97
  23. pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +0 -54
  24. pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +0 -38
  25. pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +0 -310
  26. pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +0 -35
  27. pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +0 -34
  28. pyeasyphd/bib/bibtexbase/standardize_bib.py +0 -75
  29. pyeasyphd/bib/bibtexparser/__init__.py +0 -47
  30. pyeasyphd/bib/bibtexparser/bibtex_format.py +0 -87
  31. pyeasyphd/bib/bibtexparser/exceptions.py +0 -64
  32. pyeasyphd/bib/bibtexparser/library.py +0 -207
  33. pyeasyphd/bib/bibtexparser/middlewares/block/add.py +0 -94
  34. pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +0 -22
  35. pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +0 -62
  36. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +0 -47
  37. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +0 -31
  38. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +0 -222
  39. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +0 -34
  40. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +0 -33
  41. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +0 -70
  42. pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +0 -15
  43. pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +0 -113
  44. pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +0 -34
  45. pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +0 -21
  46. pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +0 -28
  47. pyeasyphd/bib/bibtexparser/middlewares/block/title.py +0 -20
  48. pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +0 -98
  49. pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +0 -29
  50. pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +0 -124
  51. pyeasyphd/bib/bibtexparser/middlewares/middleware.py +0 -222
  52. pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +0 -13
  53. pyeasyphd/bib/bibtexparser/middlewares/utils.py +0 -226
  54. pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +0 -414
  55. pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +0 -42
  56. pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +0 -35
  57. pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +0 -29
  58. pyeasyphd/bib/bibtexparser/model.py +0 -481
  59. pyeasyphd/bib/bibtexparser/splitter.py +0 -151
  60. pyeasyphd/bib/core/__init__.py +0 -18
  61. pyeasyphd/bib/core/convert_library_to_library.py +0 -31
  62. pyeasyphd/bib/core/convert_library_to_str.py +0 -199
  63. pyeasyphd/bib/core/convert_str_to_library.py +0 -34
  64. pyeasyphd/bib/core/convert_str_to_str.py +0 -27
  65. pyeasyphd/main/python_run_bib.py +0 -73
  66. pyeasyphd/main/python_writers.py +0 -212
  67. pyeasyphd/tools/compare/compare_bibs.py +0 -234
  68. pyeasyphd/tools/experiments_base.py +0 -203
  69. pyeasyphd/tools/format_save_bibs.py +0 -178
  70. pyeasyphd/tools/replace/replace.py +0 -81
  71. pyeasyphd/tools/spider/process_spider_bib.py +0 -247
  72. pyeasyphd/tools/spider/process_spider_url.py +0 -75
  73. pyeasyphd/tools/spider/process_spider_url_bib.py +0 -62
  74. pyeasyphd-0.1.0.dist-info/RECORD +0 -80
  75. {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/WHEEL +0 -0
@@ -1,234 +0,0 @@
1
- import copy
2
- import os
3
- import re
4
- from typing import Any, Dict, List, Optional, Tuple, Union
5
-
6
- from pyadvtools import standard_path, transform_to_data_list
7
-
8
- from ...bib.bibtexparser import Block, Library
9
- from ...main import PythonRunBib, PythonWriters
10
- from ..experiments_base import obtain_local_abbr_paths
11
-
12
- ARXIV_BIORXIV = ["arxiv", "biorxiv", "ssrn"]
13
-
14
-
15
- def obtain_local_abbr_paths_for_abbr(options: dict, path_spidered_bibs: str, path_spidering_bibs: str) -> List[str]:
16
- path_spidered_bibs = standard_path(path_spidered_bibs)
17
- path_spidering_bibs = standard_path(path_spidering_bibs)
18
-
19
- path_abbrs = []
20
- path_abbrs.extend(obtain_local_abbr_paths(os.path.join(path_spidered_bibs, "Journals"), options))
21
- path_abbrs.extend(obtain_local_abbr_paths(os.path.join(path_spidered_bibs, "Conferences"), options))
22
- path_abbrs.extend(obtain_local_abbr_paths(os.path.join(path_spidering_bibs, "spider_j"), options))
23
- path_abbrs.extend(obtain_local_abbr_paths(os.path.join(path_spidering_bibs, "spider_c"), options))
24
-
25
- if options.get("include_early_access", True):
26
- path_abbrs.extend(obtain_local_abbr_paths(os.path.join(path_spidering_bibs, "spider_j_e"), options))
27
-
28
- path_abbrs = [p for p in path_abbrs if os.path.basename(p).lower() not in ARXIV_BIORXIV]
29
- return path_abbrs
30
-
31
-
32
- def compare_bibs_with_local(
33
- original_data: Union[List[str], str],
34
- path_spidered_bibs: str,
35
- path_spidering_bibs: str,
36
- path_output: str,
37
- options: Dict[str, Any],
38
- ) -> None:
39
- path_output = standard_path(path_output)
40
-
41
- # generate for original data
42
- _options = {}
43
- _options.update(options)
44
- _python_bib = PythonRunBib(_options)
45
- data_list = transform_to_data_list(original_data, ".bib")
46
- library = _python_bib.parse_to_single_standard_library(data_list)
47
- original_entry_keys = [entry.key for entry in library.entries]
48
-
49
- # generate dict for abbr key entry
50
- abbr_key_entries_dict, not_in_local_entries = generate_abbr_key_entry_dict(library, options)
51
-
52
- # compare with local bibs
53
- tuple_entries = _compare_with_local(abbr_key_entries_dict, path_spidered_bibs, path_spidering_bibs, options)
54
- searched_entries, not_searched_entries, duplicate_original_entries, duplicate_searched_entries = tuple_entries
55
- not_in_local_entries.extend(not_searched_entries)
56
-
57
- # write with sorting blocks according to original cite keys
58
- _options = {}
59
- _options["is_sort_entry_fields"] = True # default is True
60
- _options["is_sort_blocks"] = True # default is True
61
- _options["sort_entries_by_cite_keys"] = original_entry_keys
62
- _python_write = PythonWriters(_options)
63
- _python_write.write_to_file(searched_entries, "in_local_entries.bib", "w", path_output, False)
64
- _python_write.write_to_file(not_in_local_entries, "not_in_local_entries.bib", "w", path_output, False)
65
-
66
- # write without sorting blocks
67
- _options = {}
68
- _options["is_sort_entry_fields"] = True # default is True
69
- _options["is_sort_blocks"] = False # default is True
70
- _python_write = PythonWriters(_options)
71
- _python_write.write_to_file(duplicate_original_entries, "duplicate_original_entries.bib", "w", path_output, False)
72
- _python_write.write_to_file(duplicate_searched_entries, "duplicate_searched_entries.bib", "w", path_output, False)
73
- return None
74
-
75
-
76
- def generate_abbr_key_entry_dict(library: Library, options: Dict[str, Any]):
77
- _options = {}
78
- _options["is_standardize_bib"] = True # default is True
79
- _options["choose_abbr_zotero_save"] = "save" # default is "save"
80
- _options["function_common_again"] = True # default is True
81
- _options["generate_entry_cite_keys"] = True # default is False
82
- _options.update(options)
83
- _python_bib = PythonRunBib(_options)
84
-
85
- abbr_key_entries_dict, not_in_local_entries = {}, []
86
- for entry in library.entries:
87
- flag = False
88
-
89
- if ("title" in entry) and (entry["title"].strip()) and ("year" in entry) and (entry["year"].strip()):
90
- temp_library = _python_bib.parse_to_single_standard_library(copy.deepcopy(Library([entry])))
91
- if len(entries := temp_library.entries) == 1:
92
-
93
- # article and inproceedings
94
- temps = entries[0].key.split("_")
95
- if (len(temps) == 3) and temps[0].lower() in ["j", "c"]:
96
- abbr_key_entries_dict.setdefault(temps[1], {}).update({entry.key: entry})
97
- flag = True
98
-
99
- # misc (arXiv, bioRxiv, and ssrn)
100
- elif (len(temps) == 2) and temps[0].lower() in ARXIV_BIORXIV:
101
- abbr_key_entries_dict.setdefault(temps[0], {}).update({entry.key: entry})
102
- flag = True
103
-
104
- if not flag:
105
- not_in_local_entries.append(entry)
106
- return abbr_key_entries_dict, not_in_local_entries
107
-
108
-
109
- def _compare_with_local(
110
- abbr_key_entries_dict: Dict[str, Dict[str, Block]],
111
- local_path_spidered_bibs: str,
112
- local_path_spidering_bibs: str,
113
- options: Dict[str, Any],
114
- ) -> Tuple[List[Block], List[Block], List[Block], List[Block]]:
115
- # compare with local bibs
116
- searched_entries, not_searched_entries, duplicate_original_entries, duplicate_searched_entries = [], [], [], []
117
- for abbr, old_key_entries_dict in abbr_key_entries_dict.items():
118
-
119
- options_ = {}
120
- options_.update(options)
121
- if abbr.lower() not in ARXIV_BIORXIV:
122
- options_["include_abbr_list"] = [abbr]
123
- path_abbrs = obtain_local_abbr_paths_for_abbr(options_, local_path_spidered_bibs, local_path_spidering_bibs)
124
-
125
- new_key_entries_dict = {}
126
- for path_abbr in path_abbrs:
127
- if len(data_list := transform_to_data_list(path_abbr, ".bib")) == 0:
128
- continue
129
-
130
- print("*" * 9 + f" Compare in {f'{os.sep}'.join(path_abbr.split(os.sep)[-3:])} for {abbr} " + "*" * 9)
131
-
132
- _options = {}
133
- _options["is_standardize_bib"] = False # default is True
134
- _options["choose_abbr_zotero_save"] = "save" # default is "save"
135
- _options["function_common_again"] = False # default is True
136
- _options["function_common_again_abbr"] = False # default is True
137
- _options["function_common_again_zotero"] = False # default is True
138
- _options["function_common_again_save"] = False # default is True
139
- _options["generate_entry_cite_keys"] = False # default is False
140
- _options.update(options)
141
- _library = PythonRunBib(_options).parse_to_single_standard_library(data_list)
142
- for key, entry in old_key_entries_dict.items():
143
- for _entry in _library.entries:
144
- if check_equal_for_entry(entry, _entry, ["title"], abbr):
145
- new_key_entries_dict.setdefault(key, []).append(copy.deepcopy(_entry))
146
-
147
- print()
148
-
149
- for key, entry in old_key_entries_dict.items():
150
- entries = new_key_entries_dict.get(key, [])
151
- if (length := len(entries)) == 1:
152
- entries[0].key = key
153
- searched_entries.extend(entries)
154
- elif length == 0:
155
- not_searched_entries.append(entry)
156
- else:
157
- for i, _entry in enumerate(entries):
158
- _entry.key = key + "-a" * i
159
- duplicate_original_entries.append(entry)
160
- duplicate_searched_entries.extend(entries)
161
-
162
- return searched_entries, not_searched_entries, duplicate_original_entries, duplicate_searched_entries
163
-
164
-
165
- def check_equal_for_entry(original_entry, new_entry, compare_field_list: List[str], abbr: Optional[str] = None):
166
- a_list, b_list = [original_entry.entry_type.lower()], [new_entry.entry_type.lower()]
167
- if (abbr is not None) and (abbr.lower() in ARXIV_BIORXIV):
168
- a_list, b_list = [], []
169
-
170
- regex_title = re.compile(r"\\href{(.*)}{(.*)}")
171
- for field in compare_field_list:
172
- x = original_entry[field].lower().strip() if field in original_entry else ""
173
- y = new_entry[field].lower().strip() if field in new_entry else ""
174
-
175
- if field == "title":
176
- if mch := regex_title.search(x):
177
- x = mch.group(2)
178
- if mch := regex_title.search(y):
179
- y = mch.group(2)
180
-
181
- a_list.append(x)
182
- b_list.append(y)
183
-
184
- a_list = [re.sub(r"\W", "", a) for a in a_list]
185
- b_list = [re.sub(r"\W", "", b) for b in b_list]
186
- if "_".join(a_list) == "_".join(b_list):
187
- return True
188
- return False
189
-
190
-
191
- def compare_bibs_with_zotero(
192
- zotero_bib: Union[List[str], str],
193
- download_bib: Union[List[str], str],
194
- path_output: str,
195
- options: Dict[str, Any],
196
- ) -> None:
197
- path_output = standard_path(path_output)
198
-
199
- # for zotero bib
200
- _options = {}
201
- _options.update(options)
202
- _options["generate_entry_cite_keys"] = False # default is False
203
- _python_bib = PythonRunBib(_options)
204
- data_list = transform_to_data_list(zotero_bib, ".bib")
205
- zotero_library = _python_bib.parse_to_single_standard_library(data_list)
206
-
207
- # for download bib
208
- _options = {}
209
- _options.update(options)
210
- _options["generate_entry_cite_keys"] = True # default is False
211
- _python_bib = PythonRunBib(_options)
212
- data_list = transform_to_data_list(download_bib, ".bib")
213
- download_library = _python_bib.parse_to_single_standard_library(data_list)
214
-
215
- # compare download bib and zotero bib
216
- only_in_download_entries, in_download_and_zotero_entries = [], []
217
- for download_entry in download_library.entries:
218
- flag = False
219
- for zotero_entry in zotero_library.entries:
220
- if check_equal_for_entry(zotero_entry, download_entry, ["title"], None):
221
- in_download_and_zotero_entries.append(download_entry)
222
- flag = True
223
- break
224
-
225
- if not flag:
226
- only_in_download_entries.append(download_entry)
227
-
228
- # write
229
- _options = {}
230
- _options.update(options)
231
- _python_write = PythonWriters(_options)
232
- _python_write.write_to_file(only_in_download_entries, "only_in_download.bib", "w", path_output, False)
233
- _python_write.write_to_file(in_download_and_zotero_entries, "in_download_and_zotero.bib", "w", path_output, False)
234
- return None
@@ -1,203 +0,0 @@
1
- import copy
2
- import os
3
- from typing import Any, Dict, List
4
-
5
- from pyadvtools import sort_int_str
6
-
7
- from ..bib.bibtexparser import Entry
8
-
9
-
10
- def obtain_local_abbr_paths(path_storage: str, options: dict) -> List[str]:
11
- """Get all local abbreviation paths from the storage directory.
12
-
13
- Scans the specified storage directory and returns paths to all abbreviation
14
- subdirectories that match the inclusion/exclusion criteria in options.
15
-
16
- Parameters
17
- ----------
18
- path_storage : str
19
- Root directory containing publisher/abbreviation folders
20
- options : dict
21
- Configuration dictionary containing:
22
- - include_publisher_list: List of publishers to include
23
- - exclude_publisher_list: List of publishers to exclude
24
- - include_abbr_list: List of abbreviations to include
25
- - exclude_abbr_list: List of abbreviations to exclude
26
-
27
- Returns
28
- -------
29
- List[str]
30
- List of full paths to all matching abbreviation directories
31
- """
32
- abbr_list = []
33
- if not os.path.exists(path_storage):
34
- return []
35
-
36
- publisher_abbr_dict = generate_standard_publisher_abbr_options_dict(path_storage, options)
37
- for publisher in publisher_abbr_dict:
38
- for abbr in publisher_abbr_dict[publisher]:
39
- abbr_list.append(os.path.join(path_storage, publisher, abbr))
40
- return abbr_list
41
-
42
-
43
- def generate_standard_publisher_abbr_options_dict(
44
- path_storage: str, options: Dict[str, Any]
45
- ) -> Dict[str, Dict[str, Dict[str, Any]]]:
46
- """Generate a nested dictionary of publisher/abbreviation options.
47
-
48
- Creates a hierarchical dictionary structure representing all publishers
49
- and their abbreviations that match the inclusion/exclusion criteria.
50
-
51
- Parameters
52
- ----------
53
- path_storage : str
54
- Root directory containing publisher/abbreviation folders
55
- options : Dict[str, Any]
56
- Configuration options including inclusion/exclusion lists
57
-
58
- Returns
59
- -------
60
- Dict[str, Dict[str, Dict[str, Any]]]
61
- Nested dictionary structure:
62
- - Top level: Publisher names
63
- - Middle level: Abbreviation names
64
- - Inner level: Copy of options dictionary
65
- """
66
- if not os.path.exists(path_storage):
67
- return {}
68
-
69
- # First scan directory structure to find all publishers and abbreviations
70
- publisher_abbr_dict: Dict[str, List[str]] = {}
71
- publishers = [f for f in os.listdir(path_storage) if os.path.isdir(os.path.join(path_storage, f))]
72
- for p in publishers:
73
- path_p = os.path.join(path_storage, p)
74
- publisher_abbr_dict.update({p: [f for f in os.listdir(path_p) if os.path.isdir(os.path.join(path_p, f))]})
75
-
76
- # Apply inclusion/exclusion filters to publishers
77
- publisher_list = in_not_in_list(
78
- list(publisher_abbr_dict.keys()),
79
- options.get("include_publisher_list", []),
80
- options.get("exclude_publisher_list", []),
81
- )
82
-
83
- # Build the nested options dictionary structure
84
- publisher_abbr_options_dict: Dict[str, Dict[str, Dict[str, Any]]] = {}
85
- for publisher in sort_int_str(publisher_list):
86
-
87
- # Apply inclusion/exclusion filters to abbreviations
88
- abbr_list = in_not_in_list(
89
- publisher_abbr_dict[publisher],
90
- options.get("include_abbr_list", []),
91
- options.get("exclude_abbr_list", []),
92
- )
93
-
94
- # Create nested structure with copied options
95
- for abbr_standard in sort_int_str(abbr_list):
96
- publisher_abbr_options_dict.setdefault(publisher, {}).setdefault(abbr_standard, copy.deepcopy(options))
97
- return publisher_abbr_options_dict
98
-
99
-
100
- def in_not_in_list(original: List[str], in_list: List[str], out_list: List[str]):
101
- """Filter a list based on inclusion and exclusion criteria.
102
-
103
- Parameters
104
- ----------
105
- original : List[str]
106
- Original list to filter
107
- in_list : List[str]
108
- List of items to include (case-insensitive)
109
- out_list : List[str]
110
- List of items to exclude (case-insensitive)
111
-
112
- Returns
113
- -------
114
- List[str]
115
- Filtered list containing only items that:
116
- - Are in in_list (if in_list is not empty)
117
- - Are not in out_list
118
- """
119
- if in_list := [o.lower() for o in in_list]:
120
- original = [o for o in original if o.lower() in in_list]
121
- if out_list := [o.lower() for o in out_list]:
122
- original = [o for o in original if o.lower() not in out_list]
123
- return original
124
-
125
-
126
- def generate_readme(
127
- j_conf_abbr: str,
128
- entry_type: str,
129
- year_volume_number_month_entry_dict: Dict[str, Dict[str, Dict[str, Dict[str, List[Entry]]]]],
130
- ) -> List[str]:
131
- """Generate a README markdown file summarizing bibliography entries.
132
-
133
- Creates a formatted markdown table showing publication statistics
134
- organized by year, volume, number, and month.
135
-
136
- Parameters
137
- ----------
138
- j_conf_abbr : str
139
- Journal/conference abbreviation for the title
140
- entry_type : str
141
- Type of bibliography entries (article, inproceedings, etc.)
142
- year_volume_number_month_entry_dict : Dict[str, Dict[str, Dict[str, Dict[str, List[Entry]]]]]
143
- Nested dictionary structure containing entries organized by:
144
- - Year
145
- - Volume
146
- - Number
147
- - Month
148
-
149
- Returns
150
- -------
151
- List[str]
152
- Lines of the generated markdown file, or empty list if no valid entries
153
- """
154
- # Configuration for different entry types
155
- entry_type_list = ["article", "inproceedings", "misc"]
156
- filed_key_list = ["journal", "booktitle", "publisher"]
157
-
158
- # Determine which field to display based on entry type
159
- field_key = ""
160
- if (entry_type := entry_type.lower()) in entry_type_list:
161
- field_key = filed_key_list[entry_type_list.index(entry_type)]
162
-
163
- def extract_journal_booktitle(entries: List[Entry], field_key: str) -> List[str]:
164
- """Extract unique journal/booktitle values from entries."""
165
- if field_key:
166
- contents = []
167
- for entry in entries:
168
- value = entry[field_key] if field_key in entry else ""
169
- contents.append(value)
170
- return sorted(set(contents), key=contents.index)
171
- return []
172
-
173
- # Initialize markdown content with header
174
- readme = [
175
- f"# {j_conf_abbr}-{entry_type.title()}\n\n",
176
- f"|Name|Year|Papers|{field_key.title()}|\n",
177
- "|-|-|-|-|\n"
178
- ]
179
-
180
- # Process each hierarchical level to build the table
181
- for year in year_volume_number_month_entry_dict:
182
- for volume in year_volume_number_month_entry_dict[year]:
183
- for number in year_volume_number_month_entry_dict[year][volume]:
184
- for month in year_volume_number_month_entry_dict[year][volume][number]:
185
-
186
- # Generate filename components
187
- file_name = ""
188
- for i, j in zip(["", "Vol.", "No.", "Month"], [j_conf_abbr, volume, number, month]):
189
- if j.lower().strip() in ["volume", "number", "month"]:
190
- j = ""
191
- file_name += (i + j + "-") * (len(j.strip()) >= 1)
192
-
193
- # Count papers and get journal/booktitle info
194
- number_paper = len(temp := year_volume_number_month_entry_dict[year][volume][number][month])
195
- j_b = extract_journal_booktitle(temp, field_key)
196
-
197
- # Add table row
198
- readme.append(f'|{file_name[:-1]}|{year}|{number_paper}|{"; ".join(j_b)}|' + "\n")
199
-
200
- # Only return content if we have more than just the header
201
- if len(readme) > 3:
202
- return readme
203
- return []
@@ -1,178 +0,0 @@
1
- import copy
2
- import math
3
- import os
4
- import re
5
- from typing import Any, Dict, List, Union
6
-
7
- from pyadvtools import (
8
- IterateCombineExtendDict,
9
- read_list,
10
- sort_int_str,
11
- standard_path,
12
- write_list,
13
- )
14
-
15
- from ..bib.bibtexparser import Block, Library
16
- from ..main import PythonRunBib, PythonWriters
17
- from ..tools.experiments_base import generate_readme
18
-
19
-
20
- def format_entries_for_abbr_zotero_save(
21
- j_conf_abbr: str,
22
- path_output: str,
23
- original_data: Union[List[str], str, Library],
24
- combine_year_length: int = 1,
25
- default_year_list: List[str] = [],
26
- write_flag_bib: str = "w",
27
- check_bib_exist: bool = False,
28
- write_flag_readme: str = "w",
29
- check_md_exist: bool = False,
30
- options: Dict[str, Any] = {},
31
- ) -> None:
32
- """Format bibliography entries and organize them by year and type.
33
-
34
- Processes bibliography data and organizes it into separate files by entry type and year,
35
- generating both BibTeX files and README documentation.
36
-
37
- Parameters
38
- ----------
39
- j_conf_abbr : str
40
- Journal/conference abbreviation used for naming output files
41
- path_output : str
42
- Output directory path for processed files
43
- original_data : Union[List[str], str, Library]
44
- Input bibliography data in various formats (list of strings, file path, or Library object)
45
- combine_year_length : int, optional
46
- Number of years to combine in each output file, by default 1
47
- default_year_list : List[str], optional
48
- Specific years to process (if empty, processes all years), by default []
49
- write_flag_bib : str, optional
50
- Write mode for BibTeX files ("w" for write, "a" for append), by default "w"
51
- check_bib_exist : bool, optional
52
- Whether to check if BibTeX files exist before writing, by default False
53
- write_flag_readme : str, optional
54
- Write mode for README files ("w" for write, "a" for append), by default "w"
55
- check_md_exist : bool, optional
56
- Whether to check if README files exist before writing, by default False
57
- options : Dict[str, Any], optional
58
- Additional processing options, by default {}
59
-
60
- Returns
61
- -------
62
- None
63
- """
64
- path_output = standard_path(path_output)
65
-
66
- # Set up processing options
67
- _options = {}
68
- _options.update(options)
69
- _options["is_sort_entry_fields"] = True # Force field sorting
70
- _options["is_sort_blocks"] = True # Force block sorting
71
- _options["sort_entries_by_field_keys_reverse"] = False # Sort in ascending order
72
-
73
- # Initialize helper classes
74
- _python_bib = PythonRunBib(_options)
75
-
76
- _options["empty_entry_cite_keys"] = True # Allow empty citation keys
77
- _python_writer = PythonWriters(_options)
78
-
79
- # Organize entries by type, year, volume, number, and month
80
- entry_type_year_volume_number_month_entry_dict = _python_bib.parse_to_nested_entries_dict(original_data)
81
-
82
- # Process each entry type separately
83
- for entry_type in entry_type_year_volume_number_month_entry_dict:
84
-
85
- # Filter years if specified
86
- year_dict = entry_type_year_volume_number_month_entry_dict[entry_type]
87
- year_list = sort_int_str(list(year_dict.keys()))
88
- if default_year_list:
89
- year_list = [y for y in year_list if y in default_year_list]
90
- year_dict = {year: year_dict[year] for year in year_list}
91
-
92
- # Save bibliography files grouped by years
93
- path_write = os.path.join(path_output, entry_type.lower(), "bib")
94
- for i in range(math.ceil(len(year_list) / combine_year_length)):
95
-
96
- # Determine year range for this file
97
- start_year_index = i * combine_year_length
98
- end_year_index = min([(i + 1) * combine_year_length, len(year_list)])
99
- combine_year = year_list[start_year_index:end_year_index]
100
-
101
- # Create subset dictionary for these years
102
- new_year_dict = {year: year_dict[year] for year in combine_year}
103
- entries: List[Block] = IterateCombineExtendDict().dict_update(copy.deepcopy(new_year_dict))
104
-
105
- # Generate filename based on year range
106
- name = f"{j_conf_abbr}_{combine_year[0]}"
107
- if len(combine_year) > 1:
108
- name += f"_{combine_year[-1]}"
109
- name += ".bib"
110
-
111
- # Write the bibliography file
112
- _python_writer.write_to_file(entries, name, write_flag_bib, path_write, check_bib_exist)
113
-
114
- # Generate and save README documentation
115
- path_write = os.path.join(path_output, entry_type.lower())
116
- readme_md = generate_readme(j_conf_abbr, entry_type, year_dict)
117
-
118
- # Handle append mode for README
119
- if re.search("a", write_flag_readme):
120
- old_readme_md = [re.sub(r"[ ]+", "", line) for line in read_list("README.md", "r", path_write)]
121
- readme_md = readme_md[3:] if old_readme_md else readme_md
122
- readme_md = [line for line in readme_md if re.sub(r"[ ]+", "", line) not in old_readme_md]
123
-
124
- write_list(readme_md, "README.md", write_flag_readme, path_write, check_md_exist)
125
-
126
-
127
- def generate_statistic_information(path_output: str) -> None:
128
- """Generate statistical information from bibliography files.
129
-
130
- Processes all BibTeX files in the directory tree and extracts key information
131
- (DOIs and URLs) into CSV files for analysis.
132
-
133
- Parameters
134
- ----------
135
- path_output : str
136
- Root directory containing BibTeX files to process
137
-
138
- Returns
139
- -------
140
- None
141
- """
142
- # Find all BibTeX files in the directory tree
143
- full_files = []
144
- for root, _, files in os.walk(path_output):
145
- full_files.extend([os.path.join(root, f) for f in files if f.endswith(".bib")])
146
-
147
- # Configure processing options
148
- _options = {
149
- "is_standardize_bib": False, # Skip standardization, default is True
150
- "choose_abbr_zotero_save": "save", # Use save format, default is "save"
151
- "function_common_again": False, # Skip reprocessing, default is True
152
- "function_common_again_abbr": False, # Skip abbreviation reprocessing, default is True
153
- "function_common_again_zotero": False, # Skip Zotero reprocessing, default is True
154
- "function_common_again_save": False, # Skip save format reprocessing, default is True
155
- "is_sort_entry_fields": False, # Skip field sorting
156
- "is_sort_blocks": False, # Skip block sorting
157
- }
158
- _python_bib = PythonRunBib(_options)
159
-
160
- # Process each BibTeX file
161
- for f in full_files:
162
- informations = []
163
- library = _python_bib.parse_to_single_standard_library(f)
164
-
165
- # Extract DOI or URL for each entry
166
- for entry in library.entries:
167
- flag = ""
168
- if not flag:
169
- flag = entry["doi"] if "doi" in entry else ""
170
- if not flag:
171
- flag = entry["url"] if "url" in entry else ""
172
- informations.append(flag + "\n")
173
-
174
- # Write information to CSV file
175
- csv_path = f.replace(".bib", ".csv").replace(f"{os.sep}bib{os.sep}", f"{os.sep}url{os.sep}")
176
- write_list(informations, csv_path, "w", None, False)
177
-
178
- return None