pyeasyphd 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyeasyphd might be problematic. Click here for more details.
- pyeasyphd/.python-version +1 -1
- pyeasyphd/main/__init__.py +0 -4
- pyeasyphd/main/basic_input.py +7 -63
- pyeasyphd/main/python_run_md.py +3 -3
- pyeasyphd/main/python_run_tex.py +1 -1
- pyeasyphd/pyeasyphd.sublime-settings +2 -160
- pyeasyphd/tools/__init__.py +1 -16
- pyeasyphd/tools/generate/generate_from_bibs.py +54 -330
- pyeasyphd/tools/generate/generate_html.py +122 -0
- pyeasyphd/tools/generate/generate_library.py +188 -0
- pyeasyphd/tools/generate/generate_links.py +13 -4
- pyeasyphd/tools/py_run_bib_md_tex.py +12 -13
- pyeasyphd/tools/search/search_base.py +8 -5
- pyeasyphd/tools/search/search_core.py +4 -3
- pyeasyphd/tools/search/search_keywords.py +1 -1
- pyeasyphd/tools/search/search_writers.py +8 -5
- {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/METADATA +3 -6
- pyeasyphd-0.1.2.dist-info/RECORD +27 -0
- pyeasyphd/bib/__init__.py +0 -1
- pyeasyphd/bib/bibtexbase/__init__.py +0 -7
- pyeasyphd/bib/bibtexbase/standardize/_base.py +0 -36
- pyeasyphd/bib/bibtexbase/standardize/default_data.py +0 -97
- pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +0 -54
- pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +0 -38
- pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +0 -310
- pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +0 -35
- pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +0 -34
- pyeasyphd/bib/bibtexbase/standardize_bib.py +0 -75
- pyeasyphd/bib/bibtexparser/__init__.py +0 -47
- pyeasyphd/bib/bibtexparser/bibtex_format.py +0 -87
- pyeasyphd/bib/bibtexparser/exceptions.py +0 -64
- pyeasyphd/bib/bibtexparser/library.py +0 -207
- pyeasyphd/bib/bibtexparser/middlewares/block/add.py +0 -94
- pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +0 -22
- pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +0 -62
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +0 -47
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +0 -31
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +0 -222
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +0 -34
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +0 -33
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +0 -70
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +0 -15
- pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +0 -113
- pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +0 -34
- pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +0 -21
- pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +0 -28
- pyeasyphd/bib/bibtexparser/middlewares/block/title.py +0 -20
- pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +0 -98
- pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +0 -29
- pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +0 -124
- pyeasyphd/bib/bibtexparser/middlewares/middleware.py +0 -222
- pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +0 -13
- pyeasyphd/bib/bibtexparser/middlewares/utils.py +0 -226
- pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +0 -414
- pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +0 -42
- pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +0 -35
- pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +0 -29
- pyeasyphd/bib/bibtexparser/model.py +0 -481
- pyeasyphd/bib/bibtexparser/splitter.py +0 -151
- pyeasyphd/bib/core/__init__.py +0 -18
- pyeasyphd/bib/core/convert_library_to_library.py +0 -31
- pyeasyphd/bib/core/convert_library_to_str.py +0 -199
- pyeasyphd/bib/core/convert_str_to_library.py +0 -34
- pyeasyphd/bib/core/convert_str_to_str.py +0 -27
- pyeasyphd/main/python_run_bib.py +0 -73
- pyeasyphd/main/python_writers.py +0 -212
- pyeasyphd/tools/compare/compare_bibs.py +0 -234
- pyeasyphd/tools/experiments_base.py +0 -203
- pyeasyphd/tools/format_save_bibs.py +0 -178
- pyeasyphd/tools/replace/replace.py +0 -81
- pyeasyphd/tools/spider/process_spider_bib.py +0 -247
- pyeasyphd/tools/spider/process_spider_url.py +0 -75
- pyeasyphd/tools/spider/process_spider_url_bib.py +0 -62
- pyeasyphd-0.1.0.dist-info/RECORD +0 -80
- {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/WHEEL +0 -0
|
@@ -1,234 +0,0 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
import os
|
|
3
|
-
import re
|
|
4
|
-
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
5
|
-
|
|
6
|
-
from pyadvtools import standard_path, transform_to_data_list
|
|
7
|
-
|
|
8
|
-
from ...bib.bibtexparser import Block, Library
|
|
9
|
-
from ...main import PythonRunBib, PythonWriters
|
|
10
|
-
from ..experiments_base import obtain_local_abbr_paths
|
|
11
|
-
|
|
12
|
-
ARXIV_BIORXIV = ["arxiv", "biorxiv", "ssrn"]
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def obtain_local_abbr_paths_for_abbr(options: dict, path_spidered_bibs: str, path_spidering_bibs: str) -> List[str]:
|
|
16
|
-
path_spidered_bibs = standard_path(path_spidered_bibs)
|
|
17
|
-
path_spidering_bibs = standard_path(path_spidering_bibs)
|
|
18
|
-
|
|
19
|
-
path_abbrs = []
|
|
20
|
-
path_abbrs.extend(obtain_local_abbr_paths(os.path.join(path_spidered_bibs, "Journals"), options))
|
|
21
|
-
path_abbrs.extend(obtain_local_abbr_paths(os.path.join(path_spidered_bibs, "Conferences"), options))
|
|
22
|
-
path_abbrs.extend(obtain_local_abbr_paths(os.path.join(path_spidering_bibs, "spider_j"), options))
|
|
23
|
-
path_abbrs.extend(obtain_local_abbr_paths(os.path.join(path_spidering_bibs, "spider_c"), options))
|
|
24
|
-
|
|
25
|
-
if options.get("include_early_access", True):
|
|
26
|
-
path_abbrs.extend(obtain_local_abbr_paths(os.path.join(path_spidering_bibs, "spider_j_e"), options))
|
|
27
|
-
|
|
28
|
-
path_abbrs = [p for p in path_abbrs if os.path.basename(p).lower() not in ARXIV_BIORXIV]
|
|
29
|
-
return path_abbrs
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def compare_bibs_with_local(
|
|
33
|
-
original_data: Union[List[str], str],
|
|
34
|
-
path_spidered_bibs: str,
|
|
35
|
-
path_spidering_bibs: str,
|
|
36
|
-
path_output: str,
|
|
37
|
-
options: Dict[str, Any],
|
|
38
|
-
) -> None:
|
|
39
|
-
path_output = standard_path(path_output)
|
|
40
|
-
|
|
41
|
-
# generate for original data
|
|
42
|
-
_options = {}
|
|
43
|
-
_options.update(options)
|
|
44
|
-
_python_bib = PythonRunBib(_options)
|
|
45
|
-
data_list = transform_to_data_list(original_data, ".bib")
|
|
46
|
-
library = _python_bib.parse_to_single_standard_library(data_list)
|
|
47
|
-
original_entry_keys = [entry.key for entry in library.entries]
|
|
48
|
-
|
|
49
|
-
# generate dict for abbr key entry
|
|
50
|
-
abbr_key_entries_dict, not_in_local_entries = generate_abbr_key_entry_dict(library, options)
|
|
51
|
-
|
|
52
|
-
# compare with local bibs
|
|
53
|
-
tuple_entries = _compare_with_local(abbr_key_entries_dict, path_spidered_bibs, path_spidering_bibs, options)
|
|
54
|
-
searched_entries, not_searched_entries, duplicate_original_entries, duplicate_searched_entries = tuple_entries
|
|
55
|
-
not_in_local_entries.extend(not_searched_entries)
|
|
56
|
-
|
|
57
|
-
# write with sorting blocks according to original cite keys
|
|
58
|
-
_options = {}
|
|
59
|
-
_options["is_sort_entry_fields"] = True # default is True
|
|
60
|
-
_options["is_sort_blocks"] = True # default is True
|
|
61
|
-
_options["sort_entries_by_cite_keys"] = original_entry_keys
|
|
62
|
-
_python_write = PythonWriters(_options)
|
|
63
|
-
_python_write.write_to_file(searched_entries, "in_local_entries.bib", "w", path_output, False)
|
|
64
|
-
_python_write.write_to_file(not_in_local_entries, "not_in_local_entries.bib", "w", path_output, False)
|
|
65
|
-
|
|
66
|
-
# write without sorting blocks
|
|
67
|
-
_options = {}
|
|
68
|
-
_options["is_sort_entry_fields"] = True # default is True
|
|
69
|
-
_options["is_sort_blocks"] = False # default is True
|
|
70
|
-
_python_write = PythonWriters(_options)
|
|
71
|
-
_python_write.write_to_file(duplicate_original_entries, "duplicate_original_entries.bib", "w", path_output, False)
|
|
72
|
-
_python_write.write_to_file(duplicate_searched_entries, "duplicate_searched_entries.bib", "w", path_output, False)
|
|
73
|
-
return None
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def generate_abbr_key_entry_dict(library: Library, options: Dict[str, Any]):
|
|
77
|
-
_options = {}
|
|
78
|
-
_options["is_standardize_bib"] = True # default is True
|
|
79
|
-
_options["choose_abbr_zotero_save"] = "save" # default is "save"
|
|
80
|
-
_options["function_common_again"] = True # default is True
|
|
81
|
-
_options["generate_entry_cite_keys"] = True # default is False
|
|
82
|
-
_options.update(options)
|
|
83
|
-
_python_bib = PythonRunBib(_options)
|
|
84
|
-
|
|
85
|
-
abbr_key_entries_dict, not_in_local_entries = {}, []
|
|
86
|
-
for entry in library.entries:
|
|
87
|
-
flag = False
|
|
88
|
-
|
|
89
|
-
if ("title" in entry) and (entry["title"].strip()) and ("year" in entry) and (entry["year"].strip()):
|
|
90
|
-
temp_library = _python_bib.parse_to_single_standard_library(copy.deepcopy(Library([entry])))
|
|
91
|
-
if len(entries := temp_library.entries) == 1:
|
|
92
|
-
|
|
93
|
-
# article and inproceedings
|
|
94
|
-
temps = entries[0].key.split("_")
|
|
95
|
-
if (len(temps) == 3) and temps[0].lower() in ["j", "c"]:
|
|
96
|
-
abbr_key_entries_dict.setdefault(temps[1], {}).update({entry.key: entry})
|
|
97
|
-
flag = True
|
|
98
|
-
|
|
99
|
-
# misc (arXiv, bioRxiv, and ssrn)
|
|
100
|
-
elif (len(temps) == 2) and temps[0].lower() in ARXIV_BIORXIV:
|
|
101
|
-
abbr_key_entries_dict.setdefault(temps[0], {}).update({entry.key: entry})
|
|
102
|
-
flag = True
|
|
103
|
-
|
|
104
|
-
if not flag:
|
|
105
|
-
not_in_local_entries.append(entry)
|
|
106
|
-
return abbr_key_entries_dict, not_in_local_entries
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def _compare_with_local(
|
|
110
|
-
abbr_key_entries_dict: Dict[str, Dict[str, Block]],
|
|
111
|
-
local_path_spidered_bibs: str,
|
|
112
|
-
local_path_spidering_bibs: str,
|
|
113
|
-
options: Dict[str, Any],
|
|
114
|
-
) -> Tuple[List[Block], List[Block], List[Block], List[Block]]:
|
|
115
|
-
# compare with local bibs
|
|
116
|
-
searched_entries, not_searched_entries, duplicate_original_entries, duplicate_searched_entries = [], [], [], []
|
|
117
|
-
for abbr, old_key_entries_dict in abbr_key_entries_dict.items():
|
|
118
|
-
|
|
119
|
-
options_ = {}
|
|
120
|
-
options_.update(options)
|
|
121
|
-
if abbr.lower() not in ARXIV_BIORXIV:
|
|
122
|
-
options_["include_abbr_list"] = [abbr]
|
|
123
|
-
path_abbrs = obtain_local_abbr_paths_for_abbr(options_, local_path_spidered_bibs, local_path_spidering_bibs)
|
|
124
|
-
|
|
125
|
-
new_key_entries_dict = {}
|
|
126
|
-
for path_abbr in path_abbrs:
|
|
127
|
-
if len(data_list := transform_to_data_list(path_abbr, ".bib")) == 0:
|
|
128
|
-
continue
|
|
129
|
-
|
|
130
|
-
print("*" * 9 + f" Compare in {f'{os.sep}'.join(path_abbr.split(os.sep)[-3:])} for {abbr} " + "*" * 9)
|
|
131
|
-
|
|
132
|
-
_options = {}
|
|
133
|
-
_options["is_standardize_bib"] = False # default is True
|
|
134
|
-
_options["choose_abbr_zotero_save"] = "save" # default is "save"
|
|
135
|
-
_options["function_common_again"] = False # default is True
|
|
136
|
-
_options["function_common_again_abbr"] = False # default is True
|
|
137
|
-
_options["function_common_again_zotero"] = False # default is True
|
|
138
|
-
_options["function_common_again_save"] = False # default is True
|
|
139
|
-
_options["generate_entry_cite_keys"] = False # default is False
|
|
140
|
-
_options.update(options)
|
|
141
|
-
_library = PythonRunBib(_options).parse_to_single_standard_library(data_list)
|
|
142
|
-
for key, entry in old_key_entries_dict.items():
|
|
143
|
-
for _entry in _library.entries:
|
|
144
|
-
if check_equal_for_entry(entry, _entry, ["title"], abbr):
|
|
145
|
-
new_key_entries_dict.setdefault(key, []).append(copy.deepcopy(_entry))
|
|
146
|
-
|
|
147
|
-
print()
|
|
148
|
-
|
|
149
|
-
for key, entry in old_key_entries_dict.items():
|
|
150
|
-
entries = new_key_entries_dict.get(key, [])
|
|
151
|
-
if (length := len(entries)) == 1:
|
|
152
|
-
entries[0].key = key
|
|
153
|
-
searched_entries.extend(entries)
|
|
154
|
-
elif length == 0:
|
|
155
|
-
not_searched_entries.append(entry)
|
|
156
|
-
else:
|
|
157
|
-
for i, _entry in enumerate(entries):
|
|
158
|
-
_entry.key = key + "-a" * i
|
|
159
|
-
duplicate_original_entries.append(entry)
|
|
160
|
-
duplicate_searched_entries.extend(entries)
|
|
161
|
-
|
|
162
|
-
return searched_entries, not_searched_entries, duplicate_original_entries, duplicate_searched_entries
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
def check_equal_for_entry(original_entry, new_entry, compare_field_list: List[str], abbr: Optional[str] = None):
|
|
166
|
-
a_list, b_list = [original_entry.entry_type.lower()], [new_entry.entry_type.lower()]
|
|
167
|
-
if (abbr is not None) and (abbr.lower() in ARXIV_BIORXIV):
|
|
168
|
-
a_list, b_list = [], []
|
|
169
|
-
|
|
170
|
-
regex_title = re.compile(r"\\href{(.*)}{(.*)}")
|
|
171
|
-
for field in compare_field_list:
|
|
172
|
-
x = original_entry[field].lower().strip() if field in original_entry else ""
|
|
173
|
-
y = new_entry[field].lower().strip() if field in new_entry else ""
|
|
174
|
-
|
|
175
|
-
if field == "title":
|
|
176
|
-
if mch := regex_title.search(x):
|
|
177
|
-
x = mch.group(2)
|
|
178
|
-
if mch := regex_title.search(y):
|
|
179
|
-
y = mch.group(2)
|
|
180
|
-
|
|
181
|
-
a_list.append(x)
|
|
182
|
-
b_list.append(y)
|
|
183
|
-
|
|
184
|
-
a_list = [re.sub(r"\W", "", a) for a in a_list]
|
|
185
|
-
b_list = [re.sub(r"\W", "", b) for b in b_list]
|
|
186
|
-
if "_".join(a_list) == "_".join(b_list):
|
|
187
|
-
return True
|
|
188
|
-
return False
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
def compare_bibs_with_zotero(
|
|
192
|
-
zotero_bib: Union[List[str], str],
|
|
193
|
-
download_bib: Union[List[str], str],
|
|
194
|
-
path_output: str,
|
|
195
|
-
options: Dict[str, Any],
|
|
196
|
-
) -> None:
|
|
197
|
-
path_output = standard_path(path_output)
|
|
198
|
-
|
|
199
|
-
# for zotero bib
|
|
200
|
-
_options = {}
|
|
201
|
-
_options.update(options)
|
|
202
|
-
_options["generate_entry_cite_keys"] = False # default is False
|
|
203
|
-
_python_bib = PythonRunBib(_options)
|
|
204
|
-
data_list = transform_to_data_list(zotero_bib, ".bib")
|
|
205
|
-
zotero_library = _python_bib.parse_to_single_standard_library(data_list)
|
|
206
|
-
|
|
207
|
-
# for download bib
|
|
208
|
-
_options = {}
|
|
209
|
-
_options.update(options)
|
|
210
|
-
_options["generate_entry_cite_keys"] = True # default is False
|
|
211
|
-
_python_bib = PythonRunBib(_options)
|
|
212
|
-
data_list = transform_to_data_list(download_bib, ".bib")
|
|
213
|
-
download_library = _python_bib.parse_to_single_standard_library(data_list)
|
|
214
|
-
|
|
215
|
-
# compare download bib and zotero bib
|
|
216
|
-
only_in_download_entries, in_download_and_zotero_entries = [], []
|
|
217
|
-
for download_entry in download_library.entries:
|
|
218
|
-
flag = False
|
|
219
|
-
for zotero_entry in zotero_library.entries:
|
|
220
|
-
if check_equal_for_entry(zotero_entry, download_entry, ["title"], None):
|
|
221
|
-
in_download_and_zotero_entries.append(download_entry)
|
|
222
|
-
flag = True
|
|
223
|
-
break
|
|
224
|
-
|
|
225
|
-
if not flag:
|
|
226
|
-
only_in_download_entries.append(download_entry)
|
|
227
|
-
|
|
228
|
-
# write
|
|
229
|
-
_options = {}
|
|
230
|
-
_options.update(options)
|
|
231
|
-
_python_write = PythonWriters(_options)
|
|
232
|
-
_python_write.write_to_file(only_in_download_entries, "only_in_download.bib", "w", path_output, False)
|
|
233
|
-
_python_write.write_to_file(in_download_and_zotero_entries, "in_download_and_zotero.bib", "w", path_output, False)
|
|
234
|
-
return None
|
|
@@ -1,203 +0,0 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
import os
|
|
3
|
-
from typing import Any, Dict, List
|
|
4
|
-
|
|
5
|
-
from pyadvtools import sort_int_str
|
|
6
|
-
|
|
7
|
-
from ..bib.bibtexparser import Entry
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def obtain_local_abbr_paths(path_storage: str, options: dict) -> List[str]:
|
|
11
|
-
"""Get all local abbreviation paths from the storage directory.
|
|
12
|
-
|
|
13
|
-
Scans the specified storage directory and returns paths to all abbreviation
|
|
14
|
-
subdirectories that match the inclusion/exclusion criteria in options.
|
|
15
|
-
|
|
16
|
-
Parameters
|
|
17
|
-
----------
|
|
18
|
-
path_storage : str
|
|
19
|
-
Root directory containing publisher/abbreviation folders
|
|
20
|
-
options : dict
|
|
21
|
-
Configuration dictionary containing:
|
|
22
|
-
- include_publisher_list: List of publishers to include
|
|
23
|
-
- exclude_publisher_list: List of publishers to exclude
|
|
24
|
-
- include_abbr_list: List of abbreviations to include
|
|
25
|
-
- exclude_abbr_list: List of abbreviations to exclude
|
|
26
|
-
|
|
27
|
-
Returns
|
|
28
|
-
-------
|
|
29
|
-
List[str]
|
|
30
|
-
List of full paths to all matching abbreviation directories
|
|
31
|
-
"""
|
|
32
|
-
abbr_list = []
|
|
33
|
-
if not os.path.exists(path_storage):
|
|
34
|
-
return []
|
|
35
|
-
|
|
36
|
-
publisher_abbr_dict = generate_standard_publisher_abbr_options_dict(path_storage, options)
|
|
37
|
-
for publisher in publisher_abbr_dict:
|
|
38
|
-
for abbr in publisher_abbr_dict[publisher]:
|
|
39
|
-
abbr_list.append(os.path.join(path_storage, publisher, abbr))
|
|
40
|
-
return abbr_list
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def generate_standard_publisher_abbr_options_dict(
|
|
44
|
-
path_storage: str, options: Dict[str, Any]
|
|
45
|
-
) -> Dict[str, Dict[str, Dict[str, Any]]]:
|
|
46
|
-
"""Generate a nested dictionary of publisher/abbreviation options.
|
|
47
|
-
|
|
48
|
-
Creates a hierarchical dictionary structure representing all publishers
|
|
49
|
-
and their abbreviations that match the inclusion/exclusion criteria.
|
|
50
|
-
|
|
51
|
-
Parameters
|
|
52
|
-
----------
|
|
53
|
-
path_storage : str
|
|
54
|
-
Root directory containing publisher/abbreviation folders
|
|
55
|
-
options : Dict[str, Any]
|
|
56
|
-
Configuration options including inclusion/exclusion lists
|
|
57
|
-
|
|
58
|
-
Returns
|
|
59
|
-
-------
|
|
60
|
-
Dict[str, Dict[str, Dict[str, Any]]]
|
|
61
|
-
Nested dictionary structure:
|
|
62
|
-
- Top level: Publisher names
|
|
63
|
-
- Middle level: Abbreviation names
|
|
64
|
-
- Inner level: Copy of options dictionary
|
|
65
|
-
"""
|
|
66
|
-
if not os.path.exists(path_storage):
|
|
67
|
-
return {}
|
|
68
|
-
|
|
69
|
-
# First scan directory structure to find all publishers and abbreviations
|
|
70
|
-
publisher_abbr_dict: Dict[str, List[str]] = {}
|
|
71
|
-
publishers = [f for f in os.listdir(path_storage) if os.path.isdir(os.path.join(path_storage, f))]
|
|
72
|
-
for p in publishers:
|
|
73
|
-
path_p = os.path.join(path_storage, p)
|
|
74
|
-
publisher_abbr_dict.update({p: [f for f in os.listdir(path_p) if os.path.isdir(os.path.join(path_p, f))]})
|
|
75
|
-
|
|
76
|
-
# Apply inclusion/exclusion filters to publishers
|
|
77
|
-
publisher_list = in_not_in_list(
|
|
78
|
-
list(publisher_abbr_dict.keys()),
|
|
79
|
-
options.get("include_publisher_list", []),
|
|
80
|
-
options.get("exclude_publisher_list", []),
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
# Build the nested options dictionary structure
|
|
84
|
-
publisher_abbr_options_dict: Dict[str, Dict[str, Dict[str, Any]]] = {}
|
|
85
|
-
for publisher in sort_int_str(publisher_list):
|
|
86
|
-
|
|
87
|
-
# Apply inclusion/exclusion filters to abbreviations
|
|
88
|
-
abbr_list = in_not_in_list(
|
|
89
|
-
publisher_abbr_dict[publisher],
|
|
90
|
-
options.get("include_abbr_list", []),
|
|
91
|
-
options.get("exclude_abbr_list", []),
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
# Create nested structure with copied options
|
|
95
|
-
for abbr_standard in sort_int_str(abbr_list):
|
|
96
|
-
publisher_abbr_options_dict.setdefault(publisher, {}).setdefault(abbr_standard, copy.deepcopy(options))
|
|
97
|
-
return publisher_abbr_options_dict
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def in_not_in_list(original: List[str], in_list: List[str], out_list: List[str]):
|
|
101
|
-
"""Filter a list based on inclusion and exclusion criteria.
|
|
102
|
-
|
|
103
|
-
Parameters
|
|
104
|
-
----------
|
|
105
|
-
original : List[str]
|
|
106
|
-
Original list to filter
|
|
107
|
-
in_list : List[str]
|
|
108
|
-
List of items to include (case-insensitive)
|
|
109
|
-
out_list : List[str]
|
|
110
|
-
List of items to exclude (case-insensitive)
|
|
111
|
-
|
|
112
|
-
Returns
|
|
113
|
-
-------
|
|
114
|
-
List[str]
|
|
115
|
-
Filtered list containing only items that:
|
|
116
|
-
- Are in in_list (if in_list is not empty)
|
|
117
|
-
- Are not in out_list
|
|
118
|
-
"""
|
|
119
|
-
if in_list := [o.lower() for o in in_list]:
|
|
120
|
-
original = [o for o in original if o.lower() in in_list]
|
|
121
|
-
if out_list := [o.lower() for o in out_list]:
|
|
122
|
-
original = [o for o in original if o.lower() not in out_list]
|
|
123
|
-
return original
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def generate_readme(
|
|
127
|
-
j_conf_abbr: str,
|
|
128
|
-
entry_type: str,
|
|
129
|
-
year_volume_number_month_entry_dict: Dict[str, Dict[str, Dict[str, Dict[str, List[Entry]]]]],
|
|
130
|
-
) -> List[str]:
|
|
131
|
-
"""Generate a README markdown file summarizing bibliography entries.
|
|
132
|
-
|
|
133
|
-
Creates a formatted markdown table showing publication statistics
|
|
134
|
-
organized by year, volume, number, and month.
|
|
135
|
-
|
|
136
|
-
Parameters
|
|
137
|
-
----------
|
|
138
|
-
j_conf_abbr : str
|
|
139
|
-
Journal/conference abbreviation for the title
|
|
140
|
-
entry_type : str
|
|
141
|
-
Type of bibliography entries (article, inproceedings, etc.)
|
|
142
|
-
year_volume_number_month_entry_dict : Dict[str, Dict[str, Dict[str, Dict[str, List[Entry]]]]]
|
|
143
|
-
Nested dictionary structure containing entries organized by:
|
|
144
|
-
- Year
|
|
145
|
-
- Volume
|
|
146
|
-
- Number
|
|
147
|
-
- Month
|
|
148
|
-
|
|
149
|
-
Returns
|
|
150
|
-
-------
|
|
151
|
-
List[str]
|
|
152
|
-
Lines of the generated markdown file, or empty list if no valid entries
|
|
153
|
-
"""
|
|
154
|
-
# Configuration for different entry types
|
|
155
|
-
entry_type_list = ["article", "inproceedings", "misc"]
|
|
156
|
-
filed_key_list = ["journal", "booktitle", "publisher"]
|
|
157
|
-
|
|
158
|
-
# Determine which field to display based on entry type
|
|
159
|
-
field_key = ""
|
|
160
|
-
if (entry_type := entry_type.lower()) in entry_type_list:
|
|
161
|
-
field_key = filed_key_list[entry_type_list.index(entry_type)]
|
|
162
|
-
|
|
163
|
-
def extract_journal_booktitle(entries: List[Entry], field_key: str) -> List[str]:
|
|
164
|
-
"""Extract unique journal/booktitle values from entries."""
|
|
165
|
-
if field_key:
|
|
166
|
-
contents = []
|
|
167
|
-
for entry in entries:
|
|
168
|
-
value = entry[field_key] if field_key in entry else ""
|
|
169
|
-
contents.append(value)
|
|
170
|
-
return sorted(set(contents), key=contents.index)
|
|
171
|
-
return []
|
|
172
|
-
|
|
173
|
-
# Initialize markdown content with header
|
|
174
|
-
readme = [
|
|
175
|
-
f"# {j_conf_abbr}-{entry_type.title()}\n\n",
|
|
176
|
-
f"|Name|Year|Papers|{field_key.title()}|\n",
|
|
177
|
-
"|-|-|-|-|\n"
|
|
178
|
-
]
|
|
179
|
-
|
|
180
|
-
# Process each hierarchical level to build the table
|
|
181
|
-
for year in year_volume_number_month_entry_dict:
|
|
182
|
-
for volume in year_volume_number_month_entry_dict[year]:
|
|
183
|
-
for number in year_volume_number_month_entry_dict[year][volume]:
|
|
184
|
-
for month in year_volume_number_month_entry_dict[year][volume][number]:
|
|
185
|
-
|
|
186
|
-
# Generate filename components
|
|
187
|
-
file_name = ""
|
|
188
|
-
for i, j in zip(["", "Vol.", "No.", "Month"], [j_conf_abbr, volume, number, month]):
|
|
189
|
-
if j.lower().strip() in ["volume", "number", "month"]:
|
|
190
|
-
j = ""
|
|
191
|
-
file_name += (i + j + "-") * (len(j.strip()) >= 1)
|
|
192
|
-
|
|
193
|
-
# Count papers and get journal/booktitle info
|
|
194
|
-
number_paper = len(temp := year_volume_number_month_entry_dict[year][volume][number][month])
|
|
195
|
-
j_b = extract_journal_booktitle(temp, field_key)
|
|
196
|
-
|
|
197
|
-
# Add table row
|
|
198
|
-
readme.append(f'|{file_name[:-1]}|{year}|{number_paper}|{"; ".join(j_b)}|' + "\n")
|
|
199
|
-
|
|
200
|
-
# Only return content if we have more than just the header
|
|
201
|
-
if len(readme) > 3:
|
|
202
|
-
return readme
|
|
203
|
-
return []
|
|
@@ -1,178 +0,0 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
import math
|
|
3
|
-
import os
|
|
4
|
-
import re
|
|
5
|
-
from typing import Any, Dict, List, Union
|
|
6
|
-
|
|
7
|
-
from pyadvtools import (
|
|
8
|
-
IterateCombineExtendDict,
|
|
9
|
-
read_list,
|
|
10
|
-
sort_int_str,
|
|
11
|
-
standard_path,
|
|
12
|
-
write_list,
|
|
13
|
-
)
|
|
14
|
-
|
|
15
|
-
from ..bib.bibtexparser import Block, Library
|
|
16
|
-
from ..main import PythonRunBib, PythonWriters
|
|
17
|
-
from ..tools.experiments_base import generate_readme
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
def format_entries_for_abbr_zotero_save(
|
|
21
|
-
j_conf_abbr: str,
|
|
22
|
-
path_output: str,
|
|
23
|
-
original_data: Union[List[str], str, Library],
|
|
24
|
-
combine_year_length: int = 1,
|
|
25
|
-
default_year_list: List[str] = [],
|
|
26
|
-
write_flag_bib: str = "w",
|
|
27
|
-
check_bib_exist: bool = False,
|
|
28
|
-
write_flag_readme: str = "w",
|
|
29
|
-
check_md_exist: bool = False,
|
|
30
|
-
options: Dict[str, Any] = {},
|
|
31
|
-
) -> None:
|
|
32
|
-
"""Format bibliography entries and organize them by year and type.
|
|
33
|
-
|
|
34
|
-
Processes bibliography data and organizes it into separate files by entry type and year,
|
|
35
|
-
generating both BibTeX files and README documentation.
|
|
36
|
-
|
|
37
|
-
Parameters
|
|
38
|
-
----------
|
|
39
|
-
j_conf_abbr : str
|
|
40
|
-
Journal/conference abbreviation used for naming output files
|
|
41
|
-
path_output : str
|
|
42
|
-
Output directory path for processed files
|
|
43
|
-
original_data : Union[List[str], str, Library]
|
|
44
|
-
Input bibliography data in various formats (list of strings, file path, or Library object)
|
|
45
|
-
combine_year_length : int, optional
|
|
46
|
-
Number of years to combine in each output file, by default 1
|
|
47
|
-
default_year_list : List[str], optional
|
|
48
|
-
Specific years to process (if empty, processes all years), by default []
|
|
49
|
-
write_flag_bib : str, optional
|
|
50
|
-
Write mode for BibTeX files ("w" for write, "a" for append), by default "w"
|
|
51
|
-
check_bib_exist : bool, optional
|
|
52
|
-
Whether to check if BibTeX files exist before writing, by default False
|
|
53
|
-
write_flag_readme : str, optional
|
|
54
|
-
Write mode for README files ("w" for write, "a" for append), by default "w"
|
|
55
|
-
check_md_exist : bool, optional
|
|
56
|
-
Whether to check if README files exist before writing, by default False
|
|
57
|
-
options : Dict[str, Any], optional
|
|
58
|
-
Additional processing options, by default {}
|
|
59
|
-
|
|
60
|
-
Returns
|
|
61
|
-
-------
|
|
62
|
-
None
|
|
63
|
-
"""
|
|
64
|
-
path_output = standard_path(path_output)
|
|
65
|
-
|
|
66
|
-
# Set up processing options
|
|
67
|
-
_options = {}
|
|
68
|
-
_options.update(options)
|
|
69
|
-
_options["is_sort_entry_fields"] = True # Force field sorting
|
|
70
|
-
_options["is_sort_blocks"] = True # Force block sorting
|
|
71
|
-
_options["sort_entries_by_field_keys_reverse"] = False # Sort in ascending order
|
|
72
|
-
|
|
73
|
-
# Initialize helper classes
|
|
74
|
-
_python_bib = PythonRunBib(_options)
|
|
75
|
-
|
|
76
|
-
_options["empty_entry_cite_keys"] = True # Allow empty citation keys
|
|
77
|
-
_python_writer = PythonWriters(_options)
|
|
78
|
-
|
|
79
|
-
# Organize entries by type, year, volume, number, and month
|
|
80
|
-
entry_type_year_volume_number_month_entry_dict = _python_bib.parse_to_nested_entries_dict(original_data)
|
|
81
|
-
|
|
82
|
-
# Process each entry type separately
|
|
83
|
-
for entry_type in entry_type_year_volume_number_month_entry_dict:
|
|
84
|
-
|
|
85
|
-
# Filter years if specified
|
|
86
|
-
year_dict = entry_type_year_volume_number_month_entry_dict[entry_type]
|
|
87
|
-
year_list = sort_int_str(list(year_dict.keys()))
|
|
88
|
-
if default_year_list:
|
|
89
|
-
year_list = [y for y in year_list if y in default_year_list]
|
|
90
|
-
year_dict = {year: year_dict[year] for year in year_list}
|
|
91
|
-
|
|
92
|
-
# Save bibliography files grouped by years
|
|
93
|
-
path_write = os.path.join(path_output, entry_type.lower(), "bib")
|
|
94
|
-
for i in range(math.ceil(len(year_list) / combine_year_length)):
|
|
95
|
-
|
|
96
|
-
# Determine year range for this file
|
|
97
|
-
start_year_index = i * combine_year_length
|
|
98
|
-
end_year_index = min([(i + 1) * combine_year_length, len(year_list)])
|
|
99
|
-
combine_year = year_list[start_year_index:end_year_index]
|
|
100
|
-
|
|
101
|
-
# Create subset dictionary for these years
|
|
102
|
-
new_year_dict = {year: year_dict[year] for year in combine_year}
|
|
103
|
-
entries: List[Block] = IterateCombineExtendDict().dict_update(copy.deepcopy(new_year_dict))
|
|
104
|
-
|
|
105
|
-
# Generate filename based on year range
|
|
106
|
-
name = f"{j_conf_abbr}_{combine_year[0]}"
|
|
107
|
-
if len(combine_year) > 1:
|
|
108
|
-
name += f"_{combine_year[-1]}"
|
|
109
|
-
name += ".bib"
|
|
110
|
-
|
|
111
|
-
# Write the bibliography file
|
|
112
|
-
_python_writer.write_to_file(entries, name, write_flag_bib, path_write, check_bib_exist)
|
|
113
|
-
|
|
114
|
-
# Generate and save README documentation
|
|
115
|
-
path_write = os.path.join(path_output, entry_type.lower())
|
|
116
|
-
readme_md = generate_readme(j_conf_abbr, entry_type, year_dict)
|
|
117
|
-
|
|
118
|
-
# Handle append mode for README
|
|
119
|
-
if re.search("a", write_flag_readme):
|
|
120
|
-
old_readme_md = [re.sub(r"[ ]+", "", line) for line in read_list("README.md", "r", path_write)]
|
|
121
|
-
readme_md = readme_md[3:] if old_readme_md else readme_md
|
|
122
|
-
readme_md = [line for line in readme_md if re.sub(r"[ ]+", "", line) not in old_readme_md]
|
|
123
|
-
|
|
124
|
-
write_list(readme_md, "README.md", write_flag_readme, path_write, check_md_exist)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def generate_statistic_information(path_output: str) -> None:
|
|
128
|
-
"""Generate statistical information from bibliography files.
|
|
129
|
-
|
|
130
|
-
Processes all BibTeX files in the directory tree and extracts key information
|
|
131
|
-
(DOIs and URLs) into CSV files for analysis.
|
|
132
|
-
|
|
133
|
-
Parameters
|
|
134
|
-
----------
|
|
135
|
-
path_output : str
|
|
136
|
-
Root directory containing BibTeX files to process
|
|
137
|
-
|
|
138
|
-
Returns
|
|
139
|
-
-------
|
|
140
|
-
None
|
|
141
|
-
"""
|
|
142
|
-
# Find all BibTeX files in the directory tree
|
|
143
|
-
full_files = []
|
|
144
|
-
for root, _, files in os.walk(path_output):
|
|
145
|
-
full_files.extend([os.path.join(root, f) for f in files if f.endswith(".bib")])
|
|
146
|
-
|
|
147
|
-
# Configure processing options
|
|
148
|
-
_options = {
|
|
149
|
-
"is_standardize_bib": False, # Skip standardization, default is True
|
|
150
|
-
"choose_abbr_zotero_save": "save", # Use save format, default is "save"
|
|
151
|
-
"function_common_again": False, # Skip reprocessing, default is True
|
|
152
|
-
"function_common_again_abbr": False, # Skip abbreviation reprocessing, default is True
|
|
153
|
-
"function_common_again_zotero": False, # Skip Zotero reprocessing, default is True
|
|
154
|
-
"function_common_again_save": False, # Skip save format reprocessing, default is True
|
|
155
|
-
"is_sort_entry_fields": False, # Skip field sorting
|
|
156
|
-
"is_sort_blocks": False, # Skip block sorting
|
|
157
|
-
}
|
|
158
|
-
_python_bib = PythonRunBib(_options)
|
|
159
|
-
|
|
160
|
-
# Process each BibTeX file
|
|
161
|
-
for f in full_files:
|
|
162
|
-
informations = []
|
|
163
|
-
library = _python_bib.parse_to_single_standard_library(f)
|
|
164
|
-
|
|
165
|
-
# Extract DOI or URL for each entry
|
|
166
|
-
for entry in library.entries:
|
|
167
|
-
flag = ""
|
|
168
|
-
if not flag:
|
|
169
|
-
flag = entry["doi"] if "doi" in entry else ""
|
|
170
|
-
if not flag:
|
|
171
|
-
flag = entry["url"] if "url" in entry else ""
|
|
172
|
-
informations.append(flag + "\n")
|
|
173
|
-
|
|
174
|
-
# Write information to CSV file
|
|
175
|
-
csv_path = f.replace(".bib", ".csv").replace(f"{os.sep}bib{os.sep}", f"{os.sep}url{os.sep}")
|
|
176
|
-
write_list(informations, csv_path, "w", None, False)
|
|
177
|
-
|
|
178
|
-
return None
|