gismap 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gismap/sources/dblp.py ADDED
@@ -0,0 +1,162 @@
1
+ from typing import ClassVar
2
+ from dataclasses import dataclass, field
3
+ from urllib.parse import quote_plus
4
+ from bs4 import BeautifulSoup as Soup
5
+ from time import sleep
6
+
7
+ from gismap.sources.models import DB, Author, Publication
8
+ from gismap.utils.text import clean_aliases
9
+ from gismap.utils.requests import get
10
+
11
+
12
+ @dataclass(repr=False)
13
+ class DBLP(DB):
14
+ db_name: ClassVar[str] = "dblp"
15
+ author_backoff: ClassVar[float] = 7.0
16
+ publi_backoff: ClassVar[float] = 2.0
17
+
18
+ @classmethod
19
+ def search_author(cls, name, wait=True):
20
+ """
21
+ Parameters
22
+ ----------
23
+ name: :class:`str`
24
+ People to find.
25
+ wait: :class:`bool`
26
+ Wait a bit to avoid 429.
27
+
28
+ Returns
29
+ -------
30
+ :class:`list`
31
+ Potential matches.
32
+
33
+ Examples
34
+ --------
35
+
36
+ >>> fabien = DBLP.search_author("Fabien Mathieu")
37
+ >>> fabien
38
+ [DBLPAuthor(name='Fabien Mathieu', key='66/2077')]
39
+ >>> fabien[0].url
40
+ 'https://dblp.org/pid/66/2077.html'
41
+ >>> manu = DBLP.search_author("Manuel Barragan")
42
+ >>> manu # doctest: +NORMALIZE_WHITESPACE
43
+ [DBLPAuthor(name='Manuel Barragan', key='07/10587'),
44
+ DBLPAuthor(name='Manuel Barragan', key='83/3865'),
45
+ DBLPAuthor(name='Manuel Barragan', key='188/0198')]
46
+ >>> DBLP.search_author("NotaSearcherName", wait=False)
47
+ []
48
+ """
49
+ dblp_api = "https://dblp.org/search/author/api"
50
+ dblp_args = {"q": name}
51
+ r = get(dblp_api, params=dblp_args)
52
+ soup = Soup(r, features="xml")
53
+ if wait:
54
+ sleep(cls.author_backoff)
55
+ return [
56
+ DBLPAuthor(
57
+ name=name,
58
+ key=hit.url.text.split("pid/")[1],
59
+ aliases=clean_aliases(
60
+ name, [hit.author.text] + [alia.text for alia in hit("alias")]
61
+ ),
62
+ )
63
+ for hit in soup("hit")
64
+ ]
65
+
66
+ @classmethod
67
+ def from_author(cls, a, wait=True):
68
+ """
69
+ Returns
70
+ -------
71
+ :class:`list`
72
+ Papers available in DBLP.
73
+ wait: :class:`bool`
74
+ Wait a bit to avoid 429.
75
+
76
+ Examples
77
+ --------
78
+
79
+ >>> fabien = DBLPAuthor('Fabien Mathieu', key='66/2077')
80
+ >>> publications = sorted(DBLP.from_author(fabien),
81
+ ... key=lambda p: p.title)
82
+ >>> publications[0] # doctest: +NORMALIZE_WHITESPACE
83
+ DBLPPublication(title='Achievable catalog size in peer-to-peer video-on-demand systems.',
84
+ authors=[DBLPAuthor(name='Yacine Boufkhad', key='75/5742'), DBLPAuthor(name='Fabien Mathieu', key='66/2077'),
85
+ DBLPAuthor(name='Fabien de Montgolfier', key='57/6313'), DBLPAuthor(name='Diego Perino', key='03/3645'),
86
+ DBLPAuthor(name='Laurent Viennot', key='v/LaurentViennot')],
87
+ venue='IPTPS', type='conference', year=2008, key='conf/iptps/BoufkhadMMPV08',
88
+ url='https://dblp.org/rec/conf/iptps/BoufkhadMMPV08.html', pages=4)
89
+ >>> publications[-1] # doctest: +NORMALIZE_WHITESPACE
90
+ DBLPPublication(title='Upper Bounds for Stabilization in Acyclic Preference-Based Systems.',
91
+ authors=[DBLPAuthor(name='Fabien Mathieu', key='66/2077')], venue='SSS', type='conference', year=2007,
92
+ key='conf/sss/Mathieu07', url='https://dblp.org/rec/conf/sss/Mathieu07.html', pages='372-382')
93
+ """
94
+ r = get(f"https://dblp.org/pid/{a.key}.xml")
95
+ soup = Soup(r, features="xml")
96
+ if wait:
97
+ sleep(cls.author_backoff)
98
+ res = [DBLPPublication.from_soup(r) for r in soup("r")]
99
+ return [p for p in res if p.authors]
100
+
101
+
102
+ @dataclass(repr=False)
103
+ class DBLPAuthor(Author, DBLP):
104
+ key: str
105
+ aliases: list = field(default_factory=list)
106
+
107
+ @property
108
+ def url(self):
109
+ if self.key:
110
+ return f"https://dblp.org/pid/{self.key}.html"
111
+ return f"https://dblp.org/search?q={quote_plus(self.name)}"
112
+
113
+ def get_publications(self, wait=False):
114
+ return DBLP.from_author(self, wait=wait)
115
+
116
+
117
+ DBLP_TYPES = {
118
+ "article": "journal",
119
+ "inproceedings": "conference",
120
+ "proceedings": "book",
121
+ "informal": "report",
122
+ "phdthesis": "thesis",
123
+ "habil": "hdr",
124
+ "software": "software",
125
+ }
126
+
127
+
128
+ @dataclass(repr=False)
129
+ class DBLPPublication(Publication, DBLP):
130
+ key: str
131
+ url: str = None
132
+ pages: str = None
133
+ volume: int = None
134
+ number: int = None
135
+
136
+ @classmethod
137
+ def from_soup(cls, soup):
138
+ p = soup.find()
139
+ typ = p.get("publtype", p.name)
140
+ typ = DBLP_TYPES.get(typ, typ)
141
+ res = {
142
+ "type": typ,
143
+ "key": p["key"],
144
+ "url": f"https://dblp.org/rec/{p['key']}.html",
145
+ }
146
+ keys = ["title", "booktitle", "pages", "journal", "year", "volume", "number"]
147
+ for tag in keys:
148
+ t = p.find(tag)
149
+ if t:
150
+ try:
151
+ res[tag] = int(t.text)
152
+ except ValueError:
153
+ res[tag] = t.text
154
+ for tag in ["booktitle", "journal"]:
155
+ t = p.find(tag)
156
+ if t:
157
+ res["venue"] = t.text
158
+ break
159
+ else:
160
+ res["venue"] = "unpublished"
161
+ res["authors"] = [DBLPAuthor(key=a["pid"], name=a.text) for a in p("author")]
162
+ return cls(**{k: v for k, v in res.items() if k in cls.__match_args__})
gismap/sources/hal.py ADDED
@@ -0,0 +1,272 @@
1
+ from typing import ClassVar
2
+ from dataclasses import dataclass, field
3
+ from collections import defaultdict
4
+ from urllib.parse import quote_plus
5
+ import json
6
+
7
+ from gismap.sources.models import DB, Publication, Author # DBAuthor, DBPublication
8
+ from gismap.utils.text import clean_aliases
9
+ from gismap.utils.requests import get
10
+ from gismap.utils.common import unlist
11
+
12
+
13
+ @dataclass(repr=False)
14
+ class HAL(DB):
15
+ db_name: ClassVar[str] = "hal"
16
+
17
+ @classmethod
18
+ def search_author(cls, name):
19
+ """
20
+ Parameters
21
+ ----------
22
+ name: :class:`str`
23
+ People to find.
24
+
25
+ Returns
26
+ -------
27
+ :class:`list`
28
+ Potential matches.
29
+
30
+ Examples
31
+ --------
32
+
33
+ >>> fabien = HAL.search_author("Fabien Mathieu")
34
+ >>> fabien
35
+ [HALAuthor(name='Fabien Mathieu', key='fabien-mathieu')]
36
+ >>> fabien = fabien[0]
37
+ >>> fabien.url
38
+ 'https://hal.science/search/index/?q=*&authIdHal_s=fabien-mathieu'
39
+ >>> HAL.search_author("Laurent Viennot")[0]
40
+ HALAuthor(name='Laurent Viennot', key='laurentviennot')
41
+ >>> HAL.search_author("NotaSearcherName")
42
+ []
43
+ >>> HAL.search_author("Ana Busic")
44
+ [HALAuthor(name='Ana Busic', key='anabusic')]
45
+ >>> HAL.search_author("Potop-Butucaru Maria") # doctest: +NORMALIZE_WHITESPACE
46
+ [HALAuthor(name='Potop-Butucaru Maria', key='858256', key_type='pid'),
47
+ HALAuthor(name='Potop-Butucaru Maria', key='841868', key_type='pid')]
48
+ >>> diego = HAL.search_author("Diego Perino")
49
+ >>> diego # doctest: +NORMALIZE_WHITESPACE
50
+ [HALAuthor(name='Diego Perino', key='847558', key_type='pid'),
51
+ HALAuthor(name='Diego Perino', key='978810', key_type='pid')]
52
+ >>> diego[1].url
53
+ 'https://hal.science/search/index/?q=*&authIdPerson_i=978810'
54
+ """
55
+ hal_api = "https://api.archives-ouvertes.fr/ref/author/"
56
+ fields = ",".join(["label_s", "idHal_s", "person_i", "fullName_s"])
57
+ hal_args = {"q": name, "fl": fields, "wt": "json"}
58
+ r = get(hal_api, params=hal_args)
59
+ response = json.loads(r)["response"]
60
+ hids = defaultdict(set)
61
+ pids = defaultdict(set)
62
+ names = set()
63
+ for a in response.get("docs", []):
64
+ if "label_s" in a:
65
+ if "idHal_s" in a:
66
+ hids[a["idHal_s"]].add(a.get("label_s"))
67
+ elif "person_i" in a:
68
+ pids[a["person_i"]].add(a.get("label_s"))
69
+ elif "fullName_s" in a:
70
+ names.add(a["fullName_s"])
71
+ res = [
72
+ HALAuthor(name=name, key=k, aliases=clean_aliases(name, v))
73
+ for k, v in hids.items()
74
+ ] + [
75
+ HALAuthor(
76
+ name=name, key=str(k), aliases=clean_aliases(name, v), key_type="pid"
77
+ )
78
+ for k, v in pids.items()
79
+ ]
80
+ return (
81
+ res
82
+ if res
83
+ else [
84
+ HALAuthor(
85
+ name=name,
86
+ key=fullname,
87
+ aliases=clean_aliases(name, fullname),
88
+ key_type="fullname",
89
+ )
90
+ for fullname in names
91
+ ]
92
+ )
93
+
94
+ @classmethod
95
+ def from_author(cls, a):
96
+ """
97
+ Parameters
98
+ ----------
99
+ a: :class:`~gismap.sources.hal.HALAuthor`
100
+ Hal researcher.
101
+
102
+ Returns
103
+ -------
104
+ :class:`list`
105
+ Papers available in HAL.
106
+
107
+ Examples
108
+ --------
109
+
110
+ >>> fabien = HAL.search_author("Fabien Mathieu")[0]
111
+ >>> publications = sorted(fabien.get_publications(), key=lambda p: p.title)
112
+ >>> publications[2] # doctest: +NORMALIZE_WHITESPACE
113
+ HALPublication(title='Achievable Catalog Size in Peer-to-Peer Video-on-Demand Systems',
114
+ authors=[HALAuthor(name='Yacine Boufkhad', key='yacine-boufkhad'),
115
+ HALAuthor(name='Fabien Mathieu', key='fabien-mathieu'),
116
+ HALAuthor(name='Fabien de Montgolfier', key='949013', key_type='pid'),
117
+ HALAuthor(name='Diego Perino', key='Diego Perino', key_type='fullname'),
118
+ HALAuthor(name='Laurent Viennot', key='laurentviennot')],
119
+ venue='Proceedings of the 7th Internnational Workshop on Peer-to-Peer Systems (IPTPS)', type='conference',
120
+ year=2008, key='471724', url='https://inria.hal.science/inria-00471724v1')
121
+ >>> diego = publications[2].authors[3]
122
+ >>> diego
123
+ HALAuthor(name='Diego Perino', key='Diego Perino', key_type='fullname')
124
+ >>> len(diego.get_publications())
125
+ 28
126
+ >>> publications[-7] # doctest: +NORMALIZE_WHITESPACE
127
+ HALPublication(title='Upper bounds for stabilization in acyclic preference-based systems',
128
+ authors=[HALAuthor(name='Fabien Mathieu', key='fabien-mathieu')],
129
+ venue="SSS'07 - 9th international conference on Stabilization, Safety, and Security of Distributed Systems",
130
+ type='conference', year=2007, key='668356', url='https://inria.hal.science/hal-00668356v1')
131
+
132
+ Case of someone with multiple ids one want to cumulate:
133
+
134
+ >>> maria = HAL.search_author('Maria Potop-Butucaru')
135
+ >>> maria # doctest: +NORMALIZE_WHITESPACE
136
+ [HALAuthor(name='Maria Potop-Butucaru', key='858256', key_type='pid'),
137
+ HALAuthor(name='Maria Potop-Butucaru', key='841868', key_type='pid')]
138
+ >>> len(HAL.from_author(maria[0]))
139
+ 26
140
+ >>> len(maria[1].get_publications())
141
+ 123
142
+
143
+ Note: an error is raised if not enough data is provided
144
+
145
+ >>> HAL.from_author(HALAuthor('Fabien Mathieu'))
146
+ Traceback (most recent call last):
147
+ ...
148
+ ValueError: HALAuthor(name='Fabien Mathieu') must have a key for publications to be fetched.
149
+ """
150
+ api = "https://api.archives-ouvertes.fr/search/"
151
+ fields = [
152
+ "docid",
153
+ "abstract_s",
154
+ "label_s",
155
+ "uri_s",
156
+ "*Title_s",
157
+ "title_s",
158
+ "producedDateY_i",
159
+ "auth_s",
160
+ "authFullNamePersonIDIDHal_fs",
161
+ "docType_s",
162
+ ]
163
+ params = {"fl": fields, "rows": 2000, "wt": "json"}
164
+ if a.key is None:
165
+ raise ValueError(f"{a} must have a key for publications to be fetched.")
166
+ if a.key_type == "pid":
167
+ params["q"] = f"authIdPerson_i:{a.key}"
168
+ elif a.key_type == "fullname":
169
+ params["q"] = f'authFullName_s:"{a.key}"'
170
+ else:
171
+ params["q"] = f"authIdHal_s:{a.key}"
172
+ r = get(api, params=params)
173
+ response = json.loads(r)["response"]
174
+ res = [HALPublication.from_json(r) for r in response.get("docs", [])]
175
+ return res
176
+
177
+
178
+ @dataclass(repr=False)
179
+ class HALAuthor(Author, HAL):
180
+ key: str | int = None
181
+ key_type: str = None
182
+ aliases: list = field(default_factory=list)
183
+
184
+ @property
185
+ def url(self):
186
+ if self.key_type == "pid":
187
+ return f"https://hal.science/search/index/?q=*&authIdPerson_i={self.key}"
188
+ elif self.key_type == "fullname":
189
+ return f"https://hal.science/search/index?q={quote_plus(self.name)}"
190
+ else:
191
+ return f"https://hal.science/search/index/?q=*&authIdHal_s={self.key}"
192
+
193
+ def get_publications(self):
194
+ return HAL.from_author(self)
195
+
196
+
197
+ def parse_facet_author(a):
198
+ """
199
+
200
+ Parameters
201
+ ----------
202
+ a: :class:`str`
203
+ Hal facet of author
204
+
205
+ Returns
206
+ -------
207
+ :class:`~gismap.sources.hal.HALAuthor`
208
+
209
+ """
210
+ name, pid, hid = a.split("_FacetSep_")
211
+ if hid:
212
+ return HALAuthor(name=name, key=hid)
213
+ elif pid and int(pid):
214
+ return HALAuthor(name=name, key=pid, key_type="pid")
215
+ else:
216
+ return HALAuthor(name=name, key=name, key_type="fullname")
217
+
218
+
219
+ HAL_TYPES = {
220
+ "ART": "journal",
221
+ "COMM": "conference",
222
+ "OUV": "book",
223
+ "COUV": "chapter",
224
+ "THESE": "thesis",
225
+ "UNDEFINED": "report",
226
+ }
227
+
228
+ HAL_KEYS = {
229
+ "title_s": "title",
230
+ "abstract_s": "abstract",
231
+ "docid": "key",
232
+ "bookTitle_s": "booktitle",
233
+ "conferenceTitle_s": "conference",
234
+ "journalTitle_s": "journal",
235
+ "docType_s": "type",
236
+ "producedDateY_i": "year",
237
+ "uri_s": "url",
238
+ }
239
+
240
+
241
+ @dataclass(repr=False)
242
+ class HALPublication(Publication, HAL):
243
+ key: str
244
+ abstract: str = None
245
+ url: str = None
246
+
247
+ @classmethod
248
+ def from_json(cls, r):
249
+ """
250
+
251
+ Parameters
252
+ ----------
253
+ r: :class:`dict`
254
+ De-serialized JSON.
255
+
256
+ Returns
257
+ -------
258
+ :class:`~gismap.sources.hal.HALPublication`
259
+
260
+ """
261
+ res = {v: unlist(r[k]) for k, v in HAL_KEYS.items() if k in r}
262
+ res["authors"] = [
263
+ parse_facet_author(a) for a in r.get("authFullNamePersonIDIDHal_fs", [])
264
+ ]
265
+ for tag in ["booktitle", "journal", "conference"]:
266
+ if tag in res:
267
+ res["venue"] = res[tag]
268
+ break
269
+ else:
270
+ res["venue"] = "unpublished"
271
+ res["type"] = HAL_TYPES.get(res["type"], res["type"].lower())
272
+ return cls(**{k: v for k, v in res.items() if k in cls.__match_args__})
@@ -0,0 +1,31 @@
1
+ from dataclasses import dataclass
2
+ from typing import ClassVar
3
+
4
+ from gismap.utils.common import LazyRepr
5
+
6
+
7
+ @dataclass(repr=False)
8
+ class Author(LazyRepr):
9
+ name: str
10
+
11
+
12
+ @dataclass(repr=False)
13
+ class Publication(LazyRepr):
14
+ title: str
15
+ authors: list
16
+ venue: str
17
+ type: str
18
+ year: int
19
+
20
+
21
+ @dataclass(repr=False)
22
+ class DB(LazyRepr):
23
+ db_name: ClassVar[str] = None
24
+
25
+ @classmethod
26
+ def search_author(cls, name):
27
+ raise NotImplementedError
28
+
29
+ @classmethod
30
+ def from_author(cls, a):
31
+ raise NotImplementedError
@@ -0,0 +1,135 @@
1
+ from dataclasses import dataclass, field
2
+ from bof.fuzz import Process
3
+ import numpy as np
4
+
5
+ from gismap.sources.models import Publication, Author
6
+ from gismap.utils.text import clean_aliases
7
+
8
+
9
+ score_rosetta = {
10
+ "db_name": {"dblp": 1, "hal": 2},
11
+ "venue": {"CoRR": -1, "unpublished": -2},
12
+ "type": {"conference": 1, "journal": 2},
13
+ }
14
+
15
+
16
+ @dataclass(repr=False)
17
+ class SourcedAuthor(Author):
18
+ sources: list = field(default_factory=list)
19
+
20
+ @property
21
+ def key(self):
22
+ if self.sources:
23
+ return self.sources[0].key
24
+ else:
25
+ return None
26
+
27
+ @property
28
+ def aliases(self):
29
+ if self.sources:
30
+ return clean_aliases(
31
+ self.name, [n for a in self.sources for n in [a.name] + a.aliases]
32
+ )
33
+ else:
34
+ return []
35
+
36
+ @classmethod
37
+ def from_sources(cls, sources):
38
+ return cls(name=sources[0].name, sources=sources)
39
+
40
+ def get_publications(self, clean=True):
41
+ res = {p.key: p for a in self.sources for p in a.get_publications()}
42
+ if clean:
43
+ regroup_authors({self.key: self}, res)
44
+ return regroup_publications(res)
45
+ else:
46
+ return res
47
+
48
+
49
+ @dataclass(repr=False)
50
+ class SourcedPublication(Publication):
51
+ key: str
52
+ sources: list = field(default_factory=list)
53
+
54
+ @classmethod
55
+ def from_sources(cls, sources):
56
+ sources = sorted(sources, key=cls.score_source, reverse=True)
57
+ main = sources[0]
58
+ res = cls(
59
+ **{k: getattr(main, k) for k in main.__dict__ if k in cls.__match_args__},
60
+ sources=sources,
61
+ )
62
+ for k, v in main.__dict__.items():
63
+ if k not in cls.__match_args__:
64
+ setattr(res, k, v)
65
+ return res
66
+
67
+ @staticmethod
68
+ def score_source(source):
69
+ scores = [v.get(getattr(source, k, None), 0) for k, v in score_rosetta.items()]
70
+ scores.append(source.year)
71
+ return tuple(scores)
72
+
73
+
74
+ def regroup_authors(auth_dict, pub_dict):
75
+ """
76
+ Replace authors of publications with matching authors.
77
+ Typical use: upgrade DB-specific authors to multisource authors.
78
+
79
+ Replacement is in place.
80
+
81
+ Parameters
82
+ ----------
83
+ auth_dict: :class:`dict`
84
+ Authors to unify.
85
+ pub_dict: :class:`dict`
86
+ Publications to unify.
87
+
88
+ Returns
89
+ -------
90
+ None
91
+ """
92
+ redirection = {
93
+ k: a
94
+ for a in auth_dict.values()
95
+ for s in a.sources
96
+ for k in [s.key, s.name, *s.aliases]
97
+ }
98
+
99
+ for pub in pub_dict.values():
100
+ pub.authors = [redirection.get(a.key, a) for a in pub.authors]
101
+
102
+
103
+ def regroup_publications(pub_dict, threshold=90, length_impact=0.2):
104
+ """
105
+ Puts together copies of the same publication.
106
+
107
+ Parameters
108
+ ----------
109
+ pub_dict: :class:`dict`
110
+ Publications to unify.
111
+ threshold: float
112
+ Similarity parameter.
113
+ length_impact: float
114
+ Length impact parameter.
115
+
116
+ Returns
117
+ -------
118
+ :class:`dict`
119
+ Unified publications.
120
+ """
121
+ pub_list = [p for p in pub_dict.values()]
122
+
123
+ p = Process(length_impact=length_impact)
124
+ p.fit([paper.title for paper in pub_list])
125
+
126
+ res = dict()
127
+ done = np.zeros(len(pub_list), dtype=bool)
128
+ for i, paper in enumerate(pub_list):
129
+ if done[i]:
130
+ continue
131
+ locs = np.where(p.transform([paper.title])[0, :] > threshold)[0]
132
+ pub = SourcedPublication.from_sources([pub_list[i] for i in locs])
133
+ res[pub.key] = pub
134
+ done[locs] = True
135
+ return res
File without changes
gismap/utils/common.py ADDED
@@ -0,0 +1,60 @@
1
+ HIDDEN_KEYS = {"sources", "aliases", "abstract"}
2
+
3
+
4
+ class LazyRepr:
5
+ """
6
+ MixIn that hides empty fields in dataclasses repr's.
7
+ """
8
+
9
+ def __repr__(self):
10
+ kws = [
11
+ f"{key}={value!r}"
12
+ for key, value in self.__dict__.items()
13
+ if value and key not in HIDDEN_KEYS
14
+ ]
15
+ return f"{type(self).__name__}({', '.join(kws)})"
16
+
17
+
18
+ def unlist(x):
19
+ """
20
+ Parameters
21
+ ----------
22
+ x: :class:`str` or :class:`list` or :class:`int`
23
+ Something.
24
+
25
+ Returns
26
+ -------
27
+ x: :class:`str` or :class:`int`
28
+ If it's a list, make it flat.
29
+ """
30
+ return x[0] if (isinstance(x, list) and x) else x
31
+
32
+
33
+ def get_classes(root, key="name"):
34
+ """
35
+ Parameters
36
+ ----------
37
+ root: :class:`class`
38
+ Starting class (can be abstract).
39
+ key: :class:`str`, default='name'
40
+ Attribute to look-up
41
+
42
+ Returns
43
+ -------
44
+ :class:`dict`
45
+ Dictionaries of all subclasses that have a key attribute (as in class attribute `key`).
46
+
47
+ Examples
48
+ --------
49
+
50
+ >>> from gismap.sources.models import DB
51
+ >>> subclasses = get_classes(DB, key='db_name')
52
+ >>> dict(sorted(subclasses.items())) # doctest: +NORMALIZE_WHITESPACE
53
+ {'dblp': <class 'gismap.sources.dblp.DBLP'>, 'hal': <class 'gismap.sources.hal.HAL'>}
54
+ """
55
+ result = {
56
+ getattr(c, key): c for c in root.__subclasses__() if getattr(c, key, None)
57
+ }
58
+ for c in root.__subclasses__():
59
+ result.update(get_classes(c))
60
+ return result
gismap/utils/logger.py ADDED
@@ -0,0 +1,4 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger("GisMap")
4
+ """Default logging interface."""
@@ -0,0 +1,33 @@
1
+ from time import sleep
2
+ import requests
3
+ from gismap.utils.logger import logger
4
+
5
+
6
+ session = requests.Session()
7
+
8
+
9
+ def get(url, params=None):
10
+ """
11
+ Parameters
12
+ ----------
13
+ url: :class:`str`
14
+ Entry point to fetch.
15
+ params: :class:`dict`, optional
16
+ Get arguments (appended to URL).
17
+
18
+ Returns
19
+ -------
20
+ :class:`str`
21
+ Result.
22
+ """
23
+ while True:
24
+ r = session.get(url, params=params)
25
+ if r.status_code == 429:
26
+ try:
27
+ t = int(r.headers["Retry-After"])
28
+ except KeyError:
29
+ t = 60
30
+ logger.warning(f"Too many requests. Auto-retry in {t} seconds.")
31
+ sleep(t)
32
+ else:
33
+ return r.text