bibcite-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bibcite/__init__.py +3 -0
- bibcite/bibfile.py +194 -0
- bibcite/cli.py +272 -0
- bibcite/data/strings.bib +352 -0
- bibcite/normalize.py +86 -0
- bibcite/resolve.py +289 -0
- bibcite/sources.py +593 -0
- bibcite/venues.py +241 -0
- bibcite_cli-0.1.0.dist-info/METADATA +74 -0
- bibcite_cli-0.1.0.dist-info/RECORD +13 -0
- bibcite_cli-0.1.0.dist-info/WHEEL +4 -0
- bibcite_cli-0.1.0.dist-info/entry_points.txt +2 -0
- bibcite_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
bibcite/data/strings.bib
ADDED
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
%%%%%%%%%%%%%%%%%%%%%% Journals %%%%%%%%%%%%%%%%
|
|
2
|
+
@string{IJCV = "International Journal of Computer Vision (IJCV)"}
|
|
3
|
+
|
|
4
|
+
@string{CVIU = "Computer Vision and Image Understanding (CVIU)"}
|
|
5
|
+
|
|
6
|
+
@string{PR = "Pattern Recognition"}
|
|
7
|
+
|
|
8
|
+
@string{PRL = "Pattern Recognition Letters"}
|
|
9
|
+
|
|
10
|
+
@string{ML = "Machine Learning"}
|
|
11
|
+
|
|
12
|
+
@string{AI = "Artificial Intelligence"}
|
|
13
|
+
|
|
14
|
+
@string{AR = "Autonomous Robots"}
|
|
15
|
+
|
|
16
|
+
@string{MVA = "Machine Vision and Applications"}
|
|
17
|
+
|
|
18
|
+
@string{IVC = "Image and Vision Computing"}
|
|
19
|
+
|
|
20
|
+
@string{BBS = "Behavioral and Brain Sciences (BBS)"}
|
|
21
|
+
|
|
22
|
+
@string{VR = "Vision Research"}
|
|
23
|
+
|
|
24
|
+
@string{IR = "Information Retrieval"}
|
|
25
|
+
|
|
26
|
+
@string{NN = "Neural Networks"}
|
|
27
|
+
|
|
28
|
+
@string{CAG = "Computers \& Graphics"}
|
|
29
|
+
|
|
30
|
+
@string{CVGIP = "Computer Vision, Graphics, and Image Processing (CVGIP)"}
|
|
31
|
+
|
|
32
|
+
@string{CVGIPIU = "CVGIP: Image Understanding"}
|
|
33
|
+
|
|
34
|
+
@string{PP = "Perception \& Psychophysics"}
|
|
35
|
+
|
|
36
|
+
@string{FTCGV = "Foundations and Trends in Computer Graphics and Vision"}
|
|
37
|
+
|
|
38
|
+
@string{AdvRob = "Advanced Robotics"}
|
|
39
|
+
|
|
40
|
+
@string{Nature = "Nature"}
|
|
41
|
+
|
|
42
|
+
@string{Science = "Science"}
|
|
43
|
+
|
|
44
|
+
@string{Mechatronics = "Mechatronics"}
|
|
45
|
+
|
|
46
|
+
@string{NRN = "Nature Reviews Neuroscience"}
|
|
47
|
+
|
|
48
|
+
@string{NM = "Nature Methods"}
|
|
49
|
+
|
|
50
|
+
@string{PHY = "Physical Review E"}
|
|
51
|
+
|
|
52
|
+
@string{PsychRev = "Psychological Review"}
|
|
53
|
+
|
|
54
|
+
@string{JMLR = "Journal of Machine Learning Research (JMLR)"}
|
|
55
|
+
|
|
56
|
+
@string{JSC = "Journal of Scientific Computing"}
|
|
57
|
+
|
|
58
|
+
@string{JCN = "Journal of Cognitive Neuroscience"}
|
|
59
|
+
|
|
60
|
+
@string{JEPHPP = "Journal of Experimental Psychology: Human Perception and Performance"}
|
|
61
|
+
|
|
62
|
+
@string{JECP = "Journal of Experimental Child Psychology"}
|
|
63
|
+
|
|
64
|
+
@string{JB = "Journal of Biomechanics"}
|
|
65
|
+
|
|
66
|
+
@string{EURASIP = "EURASIP Journal on Advances in Signal Processing"}
|
|
67
|
+
|
|
68
|
+
@string{PRESENCE = "Presence: Teleoperators and Virtual Environments"}
|
|
69
|
+
|
|
70
|
+
@string{BMB = "The Bulletin of Mathematical Biophysics"}
|
|
71
|
+
|
|
72
|
+
@string{TVC = "The Visual Computer"}
|
|
73
|
+
|
|
74
|
+
@string{TJSC = "The Journal of Supercomputing"}
|
|
75
|
+
|
|
76
|
+
% IEEE
|
|
77
|
+
@string{PIEEE = "Proceedings of the IEEE"}
|
|
78
|
+
|
|
79
|
+
@string{RAL = "IEEE Robotics and Automation Letters (RA-L)"}
|
|
80
|
+
|
|
81
|
+
@string{CGA = "IEEE Computer Graphics and Applications"}
|
|
82
|
+
|
|
83
|
+
@string{IEEEA = "IEEE Access"}
|
|
84
|
+
|
|
85
|
+
@string{TPAMI = "IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"}
|
|
86
|
+
|
|
87
|
+
@string{PAMI = "IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)"}
|
|
88
|
+
|
|
89
|
+
@string{TC = "IEEE Transactions on Communications"}
|
|
90
|
+
|
|
91
|
+
@string{TCyb = "IEEE Transactions on Cybernetics"}
|
|
92
|
+
|
|
93
|
+
@string{TSE = "IEEE Transactions on Software Engineering"}
|
|
94
|
+
|
|
95
|
+
@string{TIV = "IEEE Transactions on Intelligent Vehicles"}
|
|
96
|
+
|
|
97
|
+
@string{TIP = "IEEE Transactions on Image Processing"}
|
|
98
|
+
|
|
99
|
+
@string{TOR = "IEEE Transactions on Robotics"}
|
|
100
|
+
|
|
101
|
+
@string{TAC = "IEEE Transactions on Automatic Control"}
|
|
102
|
+
|
|
103
|
+
@string{TITS = "IEEE Transactions on Intelligent Transportation Systems (T-ITS)"}
|
|
104
|
+
|
|
105
|
+
@string{TOC = "IEEE Transactions on Computers"}
|
|
106
|
+
|
|
107
|
+
@string{TVT = "IEEE Transactions on Vehicular Technologies"}
|
|
108
|
+
|
|
109
|
+
@string{TNN = "IEEE Transactions on Neural Networks"}
|
|
110
|
+
|
|
111
|
+
@string{THMS = "IEEE Transactions on Human-Machine Systems"}
|
|
112
|
+
|
|
113
|
+
@string{TCSVT = "IEEE Transactions on Circuits and Systems for Video Technology"}
|
|
114
|
+
|
|
115
|
+
@string{TBIOM = "IEEE Transactions on Biometrics, Behavior, and Identity Science (T-BIOM)"}
|
|
116
|
+
|
|
117
|
+
@string{TIT = "IEEE Transactions on Information Theory"}
|
|
118
|
+
|
|
119
|
+
@string{TVCG = "IEEE Transactions on Visualization and Computer Graphics (TVCG)"}
|
|
120
|
+
|
|
121
|
+
@string{TSSC = "IEEE Transactions on Systems Science and Cybernetics"}
|
|
122
|
+
|
|
123
|
+
@string{IRETIT= "IRE Transactions on Information Theory"}
|
|
124
|
+
|
|
125
|
+
@string{IJTEHM= "IEEE Journal of Translational Engineering in Health and Medicine"}
|
|
126
|
+
|
|
127
|
+
% ACM
|
|
128
|
+
@string{TOCHI = "ACM Transactions on Computer-Human Interaction (TOCHI)"}
|
|
129
|
+
|
|
130
|
+
@string{TOG = "ACM Transactions on Graphics (TOG)"}
|
|
131
|
+
|
|
132
|
+
@string{CACM = "Communications of the ACM (CACM)"}
|
|
133
|
+
|
|
134
|
+
@string{IMWUT = "Proceedings of the ACM on Interactive, Mobile, Wearable and Ubiquitous Technologies (IMWUT)"}
|
|
135
|
+
|
|
136
|
+
@string{CSUR = "ACM Computing Surveys (CSUR)"}
|
|
137
|
+
|
|
138
|
+
@string{THRI = "ACM Transactions on Human-Robot Interaction"}
|
|
139
|
+
|
|
140
|
+
@string{AnnStat = "Annals of Statistics"}
|
|
141
|
+
|
|
142
|
+
@string{JC = "Journal of Classification"}
|
|
143
|
+
|
|
144
|
+
@string{IJRR = "International Journal of Robotics Research (IJRR)"}
|
|
145
|
+
|
|
146
|
+
@string{RSS = "Robotics: Science and Systems (RSS)"}
|
|
147
|
+
|
|
148
|
+
@string{PLOSOne = "PLOS One"}
|
|
149
|
+
|
|
150
|
+
@string{SMO = "Sports Medicine -- Open"}
|
|
151
|
+
|
|
152
|
+
@string{IJMIR = "International Journal of Multimedia Information Retrieval (IJMIR)"}
|
|
153
|
+
|
|
154
|
+
@string{BiolCyb = "Biological Cybernetics"}
|
|
155
|
+
|
|
156
|
+
@string{Psychomet = "Psychometrika"}
|
|
157
|
+
|
|
158
|
+
@string{Biotelem = "Biotelemetry"}
|
|
159
|
+
|
|
160
|
+
@string{NC = "Neural Computation"}
|
|
161
|
+
|
|
162
|
+
@string{Neurocomputing = "Neurocomputing"}
|
|
163
|
+
|
|
164
|
+
@string{PhilosMag = "London, Edinburgh, and Dublin Philosophical Magazine and Journal of Science"}
|
|
165
|
+
|
|
166
|
+
@string{TST = "Tsinghua Science and Technology"}
|
|
167
|
+
|
|
168
|
+
@string{VRIH = "Virtual Reality \& Intelligent Hardware (VRIH)"}
|
|
169
|
+
|
|
170
|
+
@string{ISPRS = "ISPRS Journal of Photogrammetry and Remote Sensing (P\&RS)"}
|
|
171
|
+
|
|
172
|
+
@string{MMS = "Multimedia Systems"}
|
|
173
|
+
|
|
174
|
+
@string{SSS = "Social Studies of Science"}
|
|
175
|
+
|
|
176
|
+
@string{SIREV = "SIAM Review"}
|
|
177
|
+
|
|
178
|
+
@string{Sensors = "Sensors"}
|
|
179
|
+
|
|
180
|
+
@string{Electronics = "Electronics"}
|
|
181
|
+
|
|
182
|
+
@string{ARVC = "Annual Review of Vision Science"}
|
|
183
|
+
|
|
184
|
+
@string{ARP = "Annual Review of Psychology"}
|
|
185
|
+
|
|
186
|
+
@string{PRSLB = "Proceedings of the Royal Society of London. Series B, Biological Sciences"}
|
|
187
|
+
|
|
188
|
+
@string{PRSA = "Proceedings of the Royal Society A"}
|
|
189
|
+
|
|
190
|
+
@string{TJP = "The Journal of Physiology"}
|
|
191
|
+
|
|
192
|
+
@string{USSRCMMP = "USSR Computational Mathematics and Mathematical Physics"}
|
|
193
|
+
|
|
194
|
+
@string{CRHSAS = "Comptes rendus hebdomadaires des séances de l'Académie des sciences"}
|
|
195
|
+
|
|
196
|
+
%%%%%%%%%%%%%%%%%%%%% Conferences %%%%%%%%%%%%%%
|
|
197
|
+
@string{CVPR = "IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)"}
|
|
198
|
+
|
|
199
|
+
@string{ICCV = "IEEE/CVF International Conference on Computer Vision (ICCV)"}
|
|
200
|
+
|
|
201
|
+
@string{WACV = "IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)"}
|
|
202
|
+
|
|
203
|
+
@string{ECCV = "European Conference on Computer Vision (ECCV)"}
|
|
204
|
+
|
|
205
|
+
@string{ACCV = "Asian Conference on Computer Vision (ACCV)"}
|
|
206
|
+
|
|
207
|
+
@string{BMVC = "British Machine Vision Conference (BMVC)"}
|
|
208
|
+
|
|
209
|
+
@string{DAGM = "DAGM Annual Pattern Recognition Symposium"}
|
|
210
|
+
|
|
211
|
+
@string{GCPR = "DAGM German Conference on Pattern Recognition (GCPR)"}
|
|
212
|
+
|
|
213
|
+
@string{NIPS = "Advances in Neural Information Processing Systems (NIPS)"}
|
|
214
|
+
|
|
215
|
+
@string{NeurIPS = "Advances in Neural Information Processing Systems (NeurIPS)"}
|
|
216
|
+
|
|
217
|
+
@string{NeurIPSDB = "Neural Information Processing Systems: Datasets and Benchmarks Track"}
|
|
218
|
+
|
|
219
|
+
@string{TDV = "International Conference on 3D Vision (3DV)"}
|
|
220
|
+
|
|
221
|
+
@string{ICML = "International Conference on Machine Learning (ICML)"}
|
|
222
|
+
|
|
223
|
+
@string{ICLR = "International Conference on Learning Representations (ICLR)"}
|
|
224
|
+
|
|
225
|
+
@string{ICPR = "International Conference on Pattern Recogntion (ICPR)"}
|
|
226
|
+
|
|
227
|
+
@string{CAIP = "International Conference on Analysis of Images and Patterns (CAIP)"}
|
|
228
|
+
|
|
229
|
+
@string{ICIAP = "International Conference on Image Analysis and Processing (ICIAP)"}
|
|
230
|
+
|
|
231
|
+
@string{ICIAR = "International Conference on Image Analysis and Recognition (ICIAR)"}
|
|
232
|
+
|
|
233
|
+
@string{ISCS = "IEEE International Symposium on Circuits and Systems (ISCAS)"}
|
|
234
|
+
|
|
235
|
+
@string{FG = "IEEE International Conference on Automatic Face and Gesture Recognition (FG)"}
|
|
236
|
+
|
|
237
|
+
@string{CDC = "IEEE Conference on Decision and Control (CDC)"}
|
|
238
|
+
|
|
239
|
+
@string{IROS = "IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)"}
|
|
240
|
+
|
|
241
|
+
@string{ICRA = "IEEE International Conference on Robotics and Automation (ICRA)"}
|
|
242
|
+
|
|
243
|
+
@string{IVS = "IEEE Intelligent Vehicles Symposium (IV)"}
|
|
244
|
+
|
|
245
|
+
@string{ICASSP = "IEEE Conference on Acoustics, Speech and Signal Processing (ICASSP)"}
|
|
246
|
+
|
|
247
|
+
@string{ITW = "IEEE Information Theory Workshop (ITW)"}
|
|
248
|
+
|
|
249
|
+
@string{ICIP = "IEEE International Conference on Image Processing (ICIP)"}
|
|
250
|
+
|
|
251
|
+
@string{ICME = "IEEE International Conference on Multimedia \& Expo (ICME)"}
|
|
252
|
+
|
|
253
|
+
@string{CITS = "IEEE Conference on Intelligent Transportation Systems (ITSC)"}
|
|
254
|
+
|
|
255
|
+
@string{SIGGRAPH = "ACM Transactions on Graphics (Proceedings of ACM SIGGRAPH)"}
|
|
256
|
+
|
|
257
|
+
@string{SIGGRAPHAsia = "ACM Transactions on Graphics (Proceedings of ACM SIGGRAPH Asia)"}
|
|
258
|
+
|
|
259
|
+
@string{CHI = "ACM Conference on Human Factors in Computing Systems (CHI)"}
|
|
260
|
+
|
|
261
|
+
@string{MMSys = "ACM Multimedia Systems Conference (MMSys)"}
|
|
262
|
+
|
|
263
|
+
@string{SIGMOD = "ACM SIGMOD International Conference on Management of Data"}
|
|
264
|
+
|
|
265
|
+
@string{MM = "ACM International Conference on Multimedia"}
|
|
266
|
+
|
|
267
|
+
@string{KDD = "ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)"}
|
|
268
|
+
|
|
269
|
+
@string{AAAI = "Conference on Artificial Intelligence (AAAI)"}
|
|
270
|
+
|
|
271
|
+
@string{IJCAI = "International Joint Conference on Artificial Intelligence (IJCAI)"}
|
|
272
|
+
|
|
273
|
+
@string{ACC = "American Control Conference (ACC)"}
|
|
274
|
+
|
|
275
|
+
@string{WAPCV = "International Workshop on Attention in Cognitive Systems (WAPCV)"}
|
|
276
|
+
|
|
277
|
+
@string{COLT92 = "Annual Workshop on Computational Learning Theory (COLT)"}
|
|
278
|
+
|
|
279
|
+
@string{SIBGRAPI = "SIBGRAPI Conference on Graphics, Patterns and Images"}
|
|
280
|
+
|
|
281
|
+
@string{ICIRA = "International Conference on Intelligent Robotics and Applications (ICIRA)"}
|
|
282
|
+
|
|
283
|
+
@string{AISTAT = "International Conference on Artificial Intelligence and Statistics (AISTATS)"}
|
|
284
|
+
|
|
285
|
+
@string{AISTATS = "International Conference on Artificial Intelligence and Statistics (AISTATS)"}
|
|
286
|
+
|
|
287
|
+
@string{SCIA = "Scandinavian Conference on Image Analysis (SCIA)"}
|
|
288
|
+
|
|
289
|
+
@string{EUROCOLT = "European Conference on Computational Learning Theory (EuroCOLT)"}
|
|
290
|
+
|
|
291
|
+
@string{ICVS = "International Conference on Computer Vision Systems (ICVS)"}
|
|
292
|
+
|
|
293
|
+
@string{EMMCVPR = "International Conference on Energy Minimization Methods in Computer Vision and Pattern Recognition (EMMCVPR)"}
|
|
294
|
+
|
|
295
|
+
@string{IJCNN = "International Joint Conference on Neural Networks (IJCNN)"}
|
|
296
|
+
|
|
297
|
+
@string{MICCAI = "International Conference on Medical Image Computing and Computer Assisted Intervention (MICCAI)"}
|
|
298
|
+
|
|
299
|
+
@string{ICANN = "International Conference on Artificial Neural Networks (ICANN)"}
|
|
300
|
+
|
|
301
|
+
@string{ISMIR = "International Society for Music Information Retrieval Conference (ISMIR)"}
|
|
302
|
+
|
|
303
|
+
@string{AMDO = "International Conference on Articulated Motion and Deformable Objects (AMDO)"}
|
|
304
|
+
|
|
305
|
+
@string{Allerton = "Annual Allerton Conference on Communication, Control, and Computing"}
|
|
306
|
+
|
|
307
|
+
@string{OSDI = "USENIX Symposium on Operating Systems Design and Implementation (OSDI)"}
|
|
308
|
+
|
|
309
|
+
@string{BRACIS = "Brazilian Conference on Intelligent Systems (BRACIS)"}
|
|
310
|
+
|
|
311
|
+
@string{MIDL = "Medical Imaging with Deep Learning (MIDL)"}
|
|
312
|
+
|
|
313
|
+
@string{TDBODYTECH = "International Conference and Exhibition on 3D Body Scanning and Processing Technologies (3DBODY.TECH)"}
|
|
314
|
+
|
|
315
|
+
@string{IAS = "International Conference on Intelligent Autonomous Systems"}
|
|
316
|
+
|
|
317
|
+
@string{CoRL = "Conference on Robot Learning"}
|
|
318
|
+
|
|
319
|
+
@string{CRV = "Conference on Computer and Robot Vision"}
|
|
320
|
+
|
|
321
|
+
@string{ICONIP = "International Conference on Neural Information Processing"}
|
|
322
|
+
|
|
323
|
+
@string{SGP = "Symposium on Geometry Processing"}
|
|
324
|
+
|
|
325
|
+
@string{WACV_until_2016 = "IEEE Workshop on Applications of Computer Vision (WACV)"}
|
|
326
|
+
|
|
327
|
+
@string{CPAL = "Conference on Parsimony and Learning (CPAL)"}
|
|
328
|
+
|
|
329
|
+
@string{TMLR = "Transactions on Machine Learning Research (TMLR)"}
|
|
330
|
+
|
|
331
|
+
@string{ACL = "Proceedings of the Annual Meeting of the Association for Computational Linguistics (ACL)"}
|
|
332
|
+
|
|
333
|
+
@string{EMNLP = "Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP)"}
|
|
334
|
+
|
|
335
|
+
@string{NAACL = "Proceedings of the Annual Conference of the Nations of the Americas Chapter of the Association for Computational Linguistics (NAACL)"}
|
|
336
|
+
|
|
337
|
+
%%%%%%%%%%%%%%%%%%%%% Workshops %%%%%%%%%%%%%%
|
|
338
|
+
@string{ICCVW = "IEEE International Conference on Computer Vision -- Workshops (ICCVW)"}
|
|
339
|
+
|
|
340
|
+
@string{ECCVW = "European Conference on Computer Vision -- Workshops (ECCVW)"}
|
|
341
|
+
|
|
342
|
+
@string{CVPRW = "IEEE Conference on Computer Vision and Pattern Recognition -- Workshops (CVPRW)"}
|
|
343
|
+
|
|
344
|
+
@string{IROSW = "IEEE/RSJ International Conference on Intelligent Robots and Systems -- Workshops (IROSW)"}
|
|
345
|
+
|
|
346
|
+
@string{WACVW = "IEEE Winter Conference on Applications of Computer Vision -- Workshops (WACVW)"}
|
|
347
|
+
|
|
348
|
+
@string{MICCAIW = "International Conference on Medical Image Computing and Computer Assisted Intervention -- Workshops (MICCAIW)"}
|
|
349
|
+
|
|
350
|
+
@string{MMWVSCC = "ACM Multimedia Conference (MM) -- Workshop on Visual Analysis in Smart and Connected Communities (VSCC)"}
|
|
351
|
+
|
|
352
|
+
@string{ICLRBLOG = "ICLR Blogposts"}
|
bibcite/normalize.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Title/author normalization and citation-key generation.
|
|
2
|
+
|
|
3
|
+
Ported from PaperMemory's miniHash / firstNonStopLowercase logic so that
|
|
4
|
+
matching behaves identically to the battle-tested browser extension.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import re
|
|
8
|
+
import unicodedata
|
|
9
|
+
|
|
10
|
+
# NLTK-style English stop words (same purpose as PaperMemory's englishStopWords).
|
|
11
|
+
ENGLISH_STOPWORDS = frozenset(
|
|
12
|
+
"""i me my myself we our ours ourselves you your yours yourself yourselves he
|
|
13
|
+
him his himself she her hers herself it its itself they them their theirs
|
|
14
|
+
themselves what which who whom this that these those am is are was were be
|
|
15
|
+
been being have has had having do does did doing a an the and but if or
|
|
16
|
+
because as until while of at by for with about against between into through
|
|
17
|
+
during before after above below to from up down in out on off over under
|
|
18
|
+
again further then once here there when where why how all any both each few
|
|
19
|
+
more most other some such no nor not only own same so than too very s t can
|
|
20
|
+
will just don should now""".split()
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def fold_ascii(s: str) -> str:
|
|
25
|
+
"""Fold accents/unicode to plain ASCII (é -> e)."""
|
|
26
|
+
return unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def mini_hash(s: str, replace: str = "") -> str:
|
|
30
|
+
"""PaperMemory's miniHash: lowercase, non-alphanumeric replaced.
|
|
31
|
+
|
|
32
|
+
When ``replace`` is non-empty, each non-word char maps to one replacement
|
|
33
|
+
char so string positions are preserved (needed by the Google Scholar
|
|
34
|
+
parser).
|
|
35
|
+
"""
|
|
36
|
+
if replace:
|
|
37
|
+
return re.sub(r"[^a-z0-9_]", replace, s.lower())
|
|
38
|
+
return re.sub(r"[^a-z0-9_]", "", fold_ascii(s).lower())
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def norm_title(s: str) -> str:
|
|
42
|
+
"""Normalized form used to decide two titles are the same paper."""
|
|
43
|
+
return mini_hash(s)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def clean_title(s: str) -> str:
|
|
47
|
+
"""Human-readable cleanup: collapse whitespace, strip braces artifacts and
|
|
48
|
+
a single trailing period (DBLP titles end with '.')."""
|
|
49
|
+
t = re.sub(r"\s+", " ", s).strip()
|
|
50
|
+
if t.endswith(".") and not t.endswith("..."):
|
|
51
|
+
t = t[:-1]
|
|
52
|
+
return t
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def first_significant_word(title: str) -> str:
|
|
56
|
+
"""First non-stop word of a title, lowercased and alphanumeric-only."""
|
|
57
|
+
words = [mini_hash(w) for w in title.lower().split()]
|
|
58
|
+
words = [w for w in words if w]
|
|
59
|
+
meaningful = [w for w in words if w not in ENGLISH_STOPWORDS]
|
|
60
|
+
if meaningful:
|
|
61
|
+
return meaningful[0]
|
|
62
|
+
return words[0] if words else "paper"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def first_author_last_name(author_field: str) -> str:
|
|
66
|
+
"""Last name of the first author from a BibTeX author field.
|
|
67
|
+
|
|
68
|
+
Handles both "First Last and ..." and "Last, First and ..." forms.
|
|
69
|
+
"""
|
|
70
|
+
first = re.split(r"\s+and\s+", author_field.strip(), flags=re.I)[0].strip()
|
|
71
|
+
first = first.strip("{}")
|
|
72
|
+
if "," in first:
|
|
73
|
+
last = first.split(",")[0]
|
|
74
|
+
else:
|
|
75
|
+
last = first.split()[-1] if first.split() else "anon"
|
|
76
|
+
return mini_hash(last) or "anon"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def make_key(author_field: str, year: str | int, title: str) -> str:
|
|
80
|
+
"""Deterministic citation key: <lastname><year><firstword>.
|
|
81
|
+
|
|
82
|
+
Same scheme as PaperMemory (e.g. vaswani2017attention). Note that when
|
|
83
|
+
bibtex-tidy runs with --generate-keys it takes precedence; this is the
|
|
84
|
+
fallback/default key.
|
|
85
|
+
"""
|
|
86
|
+
return f"{first_author_last_name(author_field)}{year}{first_significant_word(title)}"
|