gismap 0.2.2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gismap/sources/ldb.py ADDED
@@ -0,0 +1,501 @@
1
+ from dataclasses import dataclass, field
2
+ from functools import lru_cache
3
+ from typing import ClassVar
4
+ from platformdirs import user_data_dir
5
+ from pathlib import Path
6
+ from datetime import datetime, timezone
7
+ import errno
8
+ import json
9
+ import os
10
+
11
+ import zstandard as zstd
12
+ import dill as pickle
13
+ import numpy as np
14
+ import numba as nb
15
+ from bof.fuzz import Process
16
+ from gismo.common import safe_write
17
+ from tqdm.auto import tqdm
18
+ import requests
19
+
20
+ from gismap.sources.dblp_ttl import publis_streamer
21
+ from gismap.sources.models import DB, Author, Publication
22
+ from gismap.utils.logger import logger
23
+ from gismap.utils.text import asciify
24
+ from gismap.utils.zlist import ZList
25
+
26
+
27
+ DATA_DIR = Path(user_data_dir(
28
+ appname="gismap",
29
+ appauthor=False,
30
+ ))
31
+
32
+ LDB_STEM = "ldb"
33
+
34
+ LDB_PATH = DATA_DIR / f"{LDB_STEM}.pkl.zst"
35
+
36
+ TTL_URL = "https://dblp.org/rdf/dblp.ttl.gz"
37
+
38
+ # GitHub release asset constants
39
+ GITHUB_REPO = "balouf/gismap"
40
+ GITHUB_API_URL = f"https://api.github.com/repos/{GITHUB_REPO}/releases"
41
+ LDB_ASSET_NAME = "ldb.pkl.zst"
42
+ LDB_META_PATH = DATA_DIR / "ldb_meta.json"
43
+
44
+
45
+ @dataclass(repr=False)
46
+ class LDB(DB):
47
+ """
48
+ Browse DBLP from a local copy of the database.
49
+
50
+ LDB is a class-only database - it should not be instantiated.
51
+ All methods are classmethods and state is stored in class variables.
52
+ """
53
+ db_name: ClassVar[str] = LDB_STEM
54
+ source: ClassVar[str] = TTL_URL
55
+
56
+ # Class-level state (replaces instance attributes)
57
+ authors: ClassVar[ZList | None] = None
58
+ publis: ClassVar[ZList | None] = None
59
+ keys: ClassVar[dict | None] = None
60
+ search_engine: ClassVar[Process | None] = None
61
+ _initialized: ClassVar[bool] = False
62
+
63
+ __hash__ = object.__hash__
64
+
65
+ def __init__(self):
66
+ raise TypeError(
67
+ "LDB should not be instantiated. Use class methods directly, e.g., LDB.search_author(name)"
68
+ )
69
+
70
+ @classmethod
71
+ def _ensure_loaded(cls):
72
+ """Lazy-load the database if not already loaded."""
73
+ if cls._initialized:
74
+ return
75
+ if LDB_PATH.exists():
76
+ cls.load_db()
77
+ else:
78
+ logger.info("LDB not found locally. Attempting to retrieve from GitHub...")
79
+ try:
80
+ cls.retrieve()
81
+ cls.load_db()
82
+ except RuntimeError as e:
83
+ logger.warning(f"Could not auto-retrieve LDB: {e}")
84
+
85
+ @classmethod
86
+ def build_db(cls, source=None, limit=None, n_range=2, length_impact=.1, authors_frame=512, publis_frame=256):
87
+ if source is None:
88
+ source = cls.source
89
+ authors_dict = dict()
90
+ logger.info("Retrieve publications")
91
+ with ZList(frame_size=publis_frame) as publis:
92
+ for i, (key, title, typ, authors, url, streams, pages, venue, year) in enumerate(publis_streamer(source)):
93
+ auth_indices = []
94
+ for auth_key, auth_name in authors.items():
95
+ if auth_key not in authors_dict:
96
+ authors_dict[auth_key] = (len(authors_dict), auth_name, [i])
97
+ else:
98
+ authors_dict[auth_key][2].append(i)
99
+ auth_indices.append(authors_dict[auth_key][0])
100
+ publis.append((key, title, typ, auth_indices, url, streams, pages, venue, year))
101
+ if i == limit:
102
+ break
103
+ cls.publis = publis
104
+ logger.info(f"{len(publis)} publications retrieved.")
105
+ logger.info("Compact authors")
106
+ with ZList(frame_size=authors_frame) as authors:
107
+ for key, (_, name, pubs) in tqdm(authors_dict.items()):
108
+ authors.append((key, name, pubs))
109
+ cls.authors = authors
110
+ cls.keys = {k: v[0] for k, v in authors_dict.items()}
111
+ del authors_dict
112
+ cls.search_engine = Process(n_range=n_range, length_impact=length_impact)
113
+ cls.search_engine.fit([asciify(a[1]) for a in authors])
114
+ cls.search_engine.choices = np.arange(len(authors))
115
+ cls.search_engine.vectorizer.features_ = cls.numbify_dict(cls.search_engine.vectorizer.features_)
116
+ logger.info(f"{len(cls.authors)} compacted.")
117
+ cls._invalidate_cache()
118
+ cls._initialized = True
119
+
120
+ @classmethod
121
+ @lru_cache(maxsize=50000)
122
+ def author_by_index(cls, i):
123
+ key, name, _ = cls.authors[i]
124
+ return LDBAuthor(key=key, name=name)
125
+
126
+ @classmethod
127
+ def author_by_key(cls, key):
128
+ return cls.author_by_index(cls.keys[key])
129
+
130
+ @classmethod
131
+ @lru_cache(maxsize=50000)
132
+ def publication_by_index(cls, i):
133
+ key, title, typ, authors, url, streams, pages, venue, year = cls.publis[i]
134
+ if venue is None:
135
+ venue = "unpublished"
136
+ return {"key": key, "title": title, "type": typ,
137
+ "authors": authors,
138
+ "url": url, "streams": streams, "pages": pages,
139
+ "venue": venue, "year": year}
140
+
141
+ @classmethod
142
+ def author_publications(cls, key):
143
+ cls._ensure_loaded()
144
+ _, name, pubs = cls.authors[cls.keys[key]]
145
+ pubs = [cls.publication_by_index(k).copy() for k in pubs]
146
+ auth_ids = sorted({k for p in pubs for k in p["authors"]})
147
+ auths = {k: cls.author_by_index(k) for k in auth_ids}
148
+ for pub in pubs:
149
+ pub["authors"] = [auths[k] for k in pub["authors"]]
150
+ metadata = dict()
151
+ for k in ["url", "streams", "pages"]:
152
+ v = pub.pop(k)
153
+ if v is not None:
154
+ metadata[k] = v
155
+ pub["metadata"] = metadata
156
+ return [LDBPublication(**pub) for pub in pubs]
157
+
158
+ @classmethod
159
+ @lru_cache(maxsize=1000)
160
+ def search_author(cls, name, limit=2, score_cutoff=40.0, slack=10.0):
161
+ cls._ensure_loaded()
162
+ res = cls.search_engine.extract(asciify(name), limit=limit, score_cutoff=score_cutoff)
163
+ res = [r[0] for r in res if r[1] > res[0][1] - slack]
164
+ sorted_ids = {i: cls.author_by_index(i) for i in sorted(res)}
165
+ return [sorted_ids[i] for i in res]
166
+
167
+ @classmethod
168
+ def _invalidate_cache(cls):
169
+ cls.search_author.cache_clear()
170
+ cls.publication_by_index.cache_clear()
171
+ cls.author_by_index.cache_clear()
172
+
173
+ @classmethod
174
+ def from_author(cls, a):
175
+ return cls.author_publications(a.key)
176
+
177
+ @classmethod
178
+ def _get_release_info(cls, tag: str | None = None) -> dict:
179
+ """
180
+ Fetch release metadata from GitHub API.
181
+
182
+ Parameters
183
+ ----------
184
+ tag: :class:`str`, optional
185
+ Specific release tag (e.g., "v0.4.0"). If None, fetches latest.
186
+
187
+ Returns
188
+ -------
189
+ :class:`dict`
190
+ Release metadata including tag_name and assets.
191
+
192
+ Raises
193
+ ------
194
+ :class:`RuntimeError`
195
+ If release not found or API request fails.
196
+ """
197
+ if tag is None:
198
+ url = f"{GITHUB_API_URL}/latest"
199
+ else:
200
+ url = f"{GITHUB_API_URL}/tags/{tag}"
201
+
202
+ try:
203
+ response = requests.get(url, timeout=30)
204
+ response.raise_for_status()
205
+ return response.json()
206
+ except requests.exceptions.HTTPError as e:
207
+ if response.status_code == 404:
208
+ raise RuntimeError(f"Release not found: {tag or 'latest'}") from e
209
+ raise RuntimeError(f"GitHub API error: {e}") from e
210
+ except requests.exceptions.RequestException as e:
211
+ raise RuntimeError(f"Network error fetching release info: {e}") from e
212
+
213
+ @classmethod
214
+ def _download_file(cls, url: str, dest: Path, desc: str = "Downloading"):
215
+ """
216
+ Download file with progress bar.
217
+
218
+ Parameters
219
+ ----------
220
+ url : str
221
+ URL to download from.
222
+ dest : Path
223
+ Destination file path.
224
+ desc : str
225
+ Description for progress bar.
226
+ """
227
+ dest.parent.mkdir(parents=True, exist_ok=True)
228
+
229
+ response = requests.get(url, stream=True, timeout=30)
230
+ response.raise_for_status()
231
+
232
+ total_size = int(response.headers.get('content-length', 0))
233
+
234
+ with open(dest, 'wb') as f, tqdm(
235
+ desc=desc,
236
+ total=total_size,
237
+ unit='B',
238
+ unit_scale=True,
239
+ unit_divisor=1024,
240
+ ) as pbar:
241
+ for chunk in response.iter_content(chunk_size=8192):
242
+ if chunk:
243
+ f.write(chunk)
244
+ pbar.update(len(chunk))
245
+
246
+ @classmethod
247
+ def _save_meta(cls, tag: str, url: str, size: int):
248
+ """Save version metadata to JSON file."""
249
+ meta = {
250
+ "tag": tag,
251
+ "url": url,
252
+ "size": size,
253
+ "downloaded_at": datetime.now(timezone.utc).isoformat(),
254
+ }
255
+ LDB_META_PATH.parent.mkdir(parents=True, exist_ok=True)
256
+ with open(LDB_META_PATH, 'w') as f:
257
+ json.dump(meta, f, indent=2)
258
+
259
+ @classmethod
260
+ def _load_meta(cls) -> dict | None:
261
+ """Load version metadata from JSON file."""
262
+ if not LDB_META_PATH.exists():
263
+ return None
264
+ try:
265
+ with open(LDB_META_PATH, 'r') as f:
266
+ return json.load(f)
267
+ except (json.JSONDecodeError, IOError):
268
+ return None
269
+
270
+ @classmethod
271
+ def retrieve(cls, version: str | None = None, force: bool = False):
272
+ """
273
+ Download LDB database from GitHub releases.
274
+
275
+ Parameters
276
+ ----------
277
+ version: :class:`str`, optional
278
+ Specific release version (e.g., "v0.4.0" or "0.4.0").
279
+ If None, downloads from latest release.
280
+ force: :class:`bool`, default=False
281
+ Download even if same version is installed.
282
+
283
+ Examples
284
+ --------
285
+ >> LDB.retrieve() # Latest release (freshest data)
286
+ >> LDB.retrieve("v0.4.0") # Specific version
287
+ >> LDB.retrieve("0.4.0") # Also works without 'v' prefix
288
+
289
+ Raises
290
+ ------
291
+ RuntimeError
292
+ If release or asset not found, or download fails.
293
+ """
294
+ # Normalize version string (add "v" prefix if missing)
295
+ tag = None
296
+ if version is not None:
297
+ tag = version if version.startswith("v") else f"v{version}"
298
+
299
+ # Fetch release info
300
+ logger.info(f"Fetching release info for: {tag or 'latest'}")
301
+ release_info = cls._get_release_info(tag)
302
+ release_tag = release_info["tag_name"]
303
+
304
+ # Check if already installed (unless force=True)
305
+ if not force:
306
+ meta = cls._load_meta()
307
+ if meta and meta.get("tag") == release_tag and LDB_PATH.exists():
308
+ logger.info(f"LDB version {release_tag} already installed. Use force=True to re-download.")
309
+ return
310
+
311
+ # Find ldb.pkl.zst asset in release
312
+ assets = release_info.get("assets", [])
313
+ ldb_asset = None
314
+ for asset in assets:
315
+ if asset["name"] == LDB_ASSET_NAME:
316
+ ldb_asset = asset
317
+ break
318
+
319
+ if ldb_asset is None:
320
+ raise RuntimeError(
321
+ f"Asset '{LDB_ASSET_NAME}' not found in release {release_tag}. "
322
+ f"Available assets: {[a['name'] for a in assets]}"
323
+ )
324
+
325
+ download_url = ldb_asset["browser_download_url"]
326
+ asset_size = ldb_asset["size"]
327
+
328
+ logger.info(f"Downloading LDB from release {release_tag} ({asset_size / 1e9:.2f} GB)")
329
+
330
+ # Download with progress bar
331
+ cls._download_file(download_url, LDB_PATH, desc=f"LDB {release_tag}")
332
+
333
+ # Save version metadata
334
+ cls._save_meta(release_tag, download_url, asset_size)
335
+
336
+ # Reset initialized flag so next access reloads
337
+ cls._initialized = False
338
+ cls._invalidate_cache()
339
+
340
+ logger.info(f"LDB {release_tag} successfully installed to {LDB_PATH}")
341
+
342
+ @classmethod
343
+ def db_info(cls) -> dict | None:
344
+ """
345
+ Return installed version info.
346
+
347
+ Returns
348
+ -------
349
+ :class:`dict` or :class:`None`
350
+ Dictionary with tag, date, size, path; or None if not installed.
351
+ """
352
+ meta = cls._load_meta()
353
+ if meta is None or not LDB_PATH.exists():
354
+ return None
355
+
356
+ return {
357
+ "tag": meta.get("tag"),
358
+ "downloaded_at": meta.get("downloaded_at"),
359
+ "size": meta.get("size"),
360
+ "path": str(LDB_PATH),
361
+ }
362
+
363
+ @classmethod
364
+ def check_update(cls) -> dict | None:
365
+ """
366
+ Check if a newer version is available on GitHub.
367
+
368
+ Returns
369
+ -------
370
+ :class:`dict` or None
371
+ Dictionary with update info if available, None if up to date.
372
+ """
373
+ try:
374
+ release_info = cls._get_release_info()
375
+ latest_tag = release_info["tag_name"]
376
+
377
+ meta = cls._load_meta()
378
+ current_tag = meta.get("tag") if meta else None
379
+
380
+ if current_tag == latest_tag:
381
+ logger.info(f"LDB is up to date: {current_tag}")
382
+ return None
383
+
384
+ return {
385
+ "current": current_tag,
386
+ "latest": latest_tag,
387
+ "message": f"Update available: {current_tag or 'not installed'} -> {latest_tag}",
388
+ }
389
+ except RuntimeError as e:
390
+ logger.warning(f"Could not check for updates: {e}")
391
+ return None
392
+
393
+ @classmethod
394
+ def dump(cls, filename: str, path=".", overwrite=False):
395
+ """Save class state to file."""
396
+ # Convert numba dict to regular dict for pickling
397
+ nb_dict = None
398
+ if cls.search_engine is not None:
399
+ nb_dict = cls.search_engine.vectorizer.features_
400
+ cls.search_engine.vectorizer.features_ = dict(nb_dict)
401
+
402
+ state = {
403
+ 'authors': cls.authors,
404
+ 'publis': cls.publis,
405
+ 'keys': cls.keys,
406
+ 'search_engine': cls.search_engine,
407
+ }
408
+
409
+ # Use safe_write pattern from gismo.common
410
+ destination = Path(path) / f"{Path(filename).stem}.pkl.zst"
411
+ if destination.exists() and not overwrite:
412
+ print(f"File {destination} already exists! Use overwrite option to overwrite.")
413
+ else:
414
+ with safe_write(destination) as f:
415
+ cctx = zstd.ZstdCompressor(level=3)
416
+ with cctx.stream_writer(f) as z:
417
+ pickle.dump(state, z, protocol=5)
418
+
419
+ # Restore numba dict
420
+ if cls.search_engine is not None:
421
+ cls.search_engine.vectorizer.features_ = nb_dict
422
+
423
+ @classmethod
424
+ def load(cls, filename: str, path="."):
425
+ """Load class state from file."""
426
+ dest = Path(path) / f"{Path(filename).stem}.pkl.zst"
427
+ if not dest.exists():
428
+ dest = dest.with_suffix(".pkl")
429
+ if not dest.exists():
430
+ raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), dest)
431
+
432
+ dctx = zstd.ZstdDecompressor()
433
+ with open(dest, "rb") as f, dctx.stream_reader(f) as z:
434
+ state = pickle.load(z)
435
+
436
+ cls.authors = state['authors']
437
+ cls.publis = state['publis']
438
+ cls.keys = state['keys']
439
+ cls.search_engine = state['search_engine']
440
+
441
+ if cls.search_engine is not None:
442
+ cls.search_engine.vectorizer.features_ = cls.numbify_dict(
443
+ cls.search_engine.vectorizer.features_
444
+ )
445
+
446
+ cls._invalidate_cache()
447
+ cls._initialized = True
448
+
449
+ @classmethod
450
+ def dump_db(cls):
451
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
452
+ cls.dump(LDB_STEM, path=DATA_DIR, overwrite=True)
453
+
454
+ @classmethod
455
+ def load_db(cls):
456
+ try:
457
+ cls.load(LDB_STEM, path=DATA_DIR)
458
+ except FileNotFoundError:
459
+ logger.warning("No LDB installed. Build or retrieve before using.")
460
+
461
+ @staticmethod
462
+ def delete_db():
463
+ if LDB_PATH.exists():
464
+ LDB_PATH.unlink()
465
+
466
+ @staticmethod
467
+ def numbify_dict(input_dict):
468
+ nb_dict = nb.typed.Dict.empty(key_type=nb.types.unicode_type, value_type=nb.types.int64)
469
+ for k, v in input_dict.items():
470
+ nb_dict[k] = v
471
+ return nb_dict
472
+
473
+
474
+ @dataclass(repr=False)
475
+ class LDBAuthor(Author, LDB):
476
+ key: str
477
+ aliases: list = field(default_factory=list)
478
+
479
+ @property
480
+ def url(self):
481
+ return f"https://dblp.org/pid/{self.key}.html"
482
+
483
+ def get_publications(self):
484
+ return LDB.from_author(self)
485
+
486
+
487
+
488
+ @dataclass(repr=False)
489
+ class LDBPublication(Publication, LDB):
490
+ key: str
491
+ metadata: dict = field(default_factory=dict)
492
+
493
+ @property
494
+ def url(self):
495
+ return self.metadata.get("url", f"https://dblp.org/rec/{self.key}.html")
496
+
497
+ @property
498
+ def stream(self):
499
+ if "streams" in self.metadata:
500
+ return f'https://dblp.org/streams/{self.metadata["streams"][0]}'
501
+ return None
gismap/sources/models.py CHANGED
@@ -29,3 +29,10 @@ class DB(LazyRepr):
29
29
  @classmethod
30
30
  def from_author(cls, a):
31
31
  raise NotImplementedError
32
+
33
+
34
+ def db_class_to_auth_class(db_class):
35
+ for subclass in Author.__subclasses__():
36
+ if db_class in subclass.__mro__:
37
+ return subclass
38
+ return None
gismap/sources/multi.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from dataclasses import dataclass, field
2
- from bof.fuzz import Process
2
+ from bof.fuzz import jit_square_factors
3
+ from bof.feature_extraction import CountVectorizer
3
4
  import numpy as np
4
5
 
5
6
  from gismap.sources.models import Publication, Author
@@ -14,7 +15,7 @@ def score_author_source(dbauthor):
14
15
  return 2
15
16
  else:
16
17
  return 3
17
- elif dbauthor.db_name == "dblp":
18
+ elif dbauthor.db_name in ["dblp", "ldb"]:
18
19
  return 1
19
20
  else:
20
21
  return 0
@@ -52,6 +53,8 @@ class SourcedAuthor(Author):
52
53
  def get_publications(self, clean=True, selector=None):
53
54
  if selector is None:
54
55
  selector = []
56
+ if not isinstance(selector, list):
57
+ selector = [selector]
55
58
  res = {
56
59
  p.key: p
57
60
  for a in self.sources
@@ -66,7 +69,7 @@ class SourcedAuthor(Author):
66
69
 
67
70
 
68
71
  publication_score_rosetta = {
69
- "db_name": {"dblp": 1, "hal": 2},
72
+ "db_name": {"dblp": 1, "ldb": 1, "hal": 2},
70
73
  "venue": {"CoRR": -1, "unpublished": -2},
71
74
  "type": {"conference": 1, "journal": 2},
72
75
  }
@@ -135,7 +138,9 @@ def regroup_authors(auth_dict, pub_dict):
135
138
  }
136
139
 
137
140
  for pub in pub_dict.values():
138
- pub.authors = [redirection.get(a.key, a) for a in pub.authors]
141
+ pub.authors = [
142
+ redirection.get(a.key, redirection.get(a.name, a)) for a in pub.authors
143
+ ]
139
144
 
140
145
 
141
146
  def regroup_publications(pub_dict, threshold=85, length_impact=0.05, n_range=5):
@@ -156,19 +161,22 @@ def regroup_publications(pub_dict, threshold=85, length_impact=0.05, n_range=5):
156
161
  :class:`dict`
157
162
  Unified publications.
158
163
  """
164
+ if len(pub_dict) == 0:
165
+ return dict()
159
166
  pub_list = [p for p in pub_dict.values()]
160
167
  res = dict()
161
-
162
- if pub_list:
163
- p = Process(length_impact=length_impact, n_range=n_range)
164
- p.fit([paper.title for paper in pub_list])
165
-
166
- done = np.zeros(len(pub_list), dtype=bool)
167
- for i, paper in enumerate(pub_list):
168
- if done[i]:
169
- continue
170
- locs = np.where(p.transform([paper.title])[0, :] > threshold)[0]
171
- pub = SourcedPublication.from_sources([pub_list[i] for i in locs])
172
- res[pub.key] = pub
173
- done[locs] = True
168
+ vectorizer = CountVectorizer(n_range=n_range)
169
+ x = vectorizer.fit_transform([p.title for p in pub_list])
170
+ y = x.T.tocsr()
171
+ jc_matrix = jit_square_factors(
172
+ x.indices, x.indptr, y.indices, y.indptr, len(pub_list), length_impact
173
+ )
174
+ done = np.zeros(len(pub_list), dtype=bool)
175
+ for i, paper in enumerate(pub_list):
176
+ if done[i]:
177
+ continue
178
+ locs = np.where(jc_matrix[i, :] > threshold)[0]
179
+ pub = SourcedPublication.from_sources([pub_list[i] for i in locs])
180
+ res[pub.key] = pub
181
+ done[locs] = True
174
182
  return res
gismap/utils/common.py CHANGED
@@ -30,7 +30,7 @@ def unlist(x):
30
30
  return x[0] if (isinstance(x, list) and x) else x
31
31
 
32
32
 
33
- def get_classes(root, key="name"):
33
+ def get_classes(root, key="name", recurse=False):
34
34
  """
35
35
  Parameters
36
36
  ----------
@@ -38,6 +38,8 @@ def get_classes(root, key="name"):
38
38
  Starting class (can be abstract).
39
39
  key: :class:`str`, default='name'
40
40
  Attribute to look-up
41
+ recurse: bool, default=False
42
+ Recursively traverse subclasses.
41
43
 
42
44
  Returns
43
45
  -------
@@ -50,13 +52,16 @@ def get_classes(root, key="name"):
50
52
  >>> from gismap.sources.models import DB
51
53
  >>> subclasses = get_classes(DB, key='db_name')
52
54
  >>> dict(sorted(subclasses.items())) # doctest: +NORMALIZE_WHITESPACE
53
- {'dblp': <class 'gismap.sources.dblp.DBLP'>, 'hal': <class 'gismap.sources.hal.HAL'>}
55
+ {'dblp': <class 'gismap.sources.dblp.DBLP'>,
56
+ 'hal': <class 'gismap.sources.hal.HAL'>,
57
+ 'ldb': <class 'gismap.sources.ldb.LDB'>}
54
58
  """
55
59
  result = {
56
60
  getattr(c, key): c for c in root.__subclasses__() if getattr(c, key, None)
57
61
  }
58
- for c in root.__subclasses__():
59
- result.update(get_classes(c))
62
+ if recurse:
63
+ for c in root.__subclasses__():
64
+ result.update(get_classes(c, key=key, recurse=True))
60
65
  return result
61
66
 
62
67
 
@@ -83,20 +88,20 @@ def list_of_objects(clss, dico, default=None):
83
88
 
84
89
  >>> from gismap.sources.models import DB
85
90
  >>> subclasses = get_classes(DB, key='db_name')
86
- >>> from gismap import HAL, DBLP
87
- >>> list_of_objects([HAL, 'dblp'], subclasses)
88
- [<class 'gismap.sources.hal.HAL'>, <class 'gismap.sources.dblp.DBLP'>]
91
+ >>> from gismap import HAL, DBLP, LDB
92
+ >>> list_of_objects([HAL, 'ldb'], subclasses)
93
+ [<class 'gismap.sources.hal.HAL'>, <class 'gismap.sources.ldb.LDB'>]
89
94
  >>> list_of_objects(None, subclasses, [DBLP])
90
95
  [<class 'gismap.sources.dblp.DBLP'>]
91
- >>> list_of_objects(DBLP, subclasses)
92
- [<class 'gismap.sources.dblp.DBLP'>]
96
+ >>> list_of_objects(LDB, subclasses)
97
+ [<class 'gismap.sources.ldb.LDB'>]
93
98
  >>> list_of_objects('hal', subclasses)
94
99
  [<class 'gismap.sources.hal.HAL'>]
95
100
  """
96
101
  if default is None:
97
102
  default = []
98
103
  if clss is None:
99
- return default
104
+ return list_of_objects(clss=default, dico=dico)
100
105
  elif isinstance(clss, str):
101
106
  return [dico[clss]]
102
107
  elif isinstance(clss, list):
gismap/utils/logger.py CHANGED
@@ -2,3 +2,5 @@ import logging
2
2
 
3
3
  logger = logging.getLogger("GisMap")
4
4
  """Default logging interface."""
5
+
6
+ logger.setLevel(logging.INFO)
gismap/utils/requests.py CHANGED
@@ -13,7 +13,7 @@ session.headers.update(
13
13
  )
14
14
 
15
15
 
16
- def get(url, params=None, n_trials=10):
16
+ def get(url, params=None, n_trials=10, verify=True):
17
17
  """
18
18
  Parameters
19
19
  ----------
@@ -21,6 +21,10 @@ def get(url, params=None, n_trials=10):
21
21
  Entry point to fetch.
22
22
  params: :class:`dict`, optional
23
23
  Get arguments (appended to URL).
24
+ n_trials: :class:`int`, default=10
25
+ Number of attempts to fetch URL.
26
+ verify: :class:`bool`, default=True
27
+ Verify certificates.
24
28
 
25
29
  Returns
26
30
  -------
@@ -29,7 +33,7 @@ def get(url, params=None, n_trials=10):
29
33
  """
30
34
  for attempt in range(n_trials):
31
35
  try:
32
- r = session.get(url, params=params)
36
+ r = session.get(url, params=params, verify=verify)
33
37
  if r.status_code == 429:
34
38
  try:
35
39
  t = int(r.headers["Retry-After"])