gismap 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gismap/sources/ldb.py ADDED
@@ -0,0 +1,716 @@
1
+ from dataclasses import dataclass, field
2
+ from functools import lru_cache
3
+ from typing import ClassVar
4
+ from platformdirs import user_data_dir
5
+ from pathlib import Path
6
+ from datetime import datetime, timezone
7
+ import errno
8
+ import json
9
+ import os
10
+
11
+ import zstandard as zstd
12
+ import dill as pickle
13
+ import numpy as np
14
+ import numba as nb
15
+ from bof.fuzz import Process
16
+ from gismo.common import safe_write
17
+ from tqdm.auto import tqdm
18
+ import requests
19
+
20
+ from gismap.sources.dblp_ttl import publis_streamer
21
+ from gismap.sources.models import DB, Author, Publication
22
+ from gismap.utils.common import Data
23
+ from gismap.utils.logger import logger
24
+ from gismap.utils.text import normalized_name
25
+ from gismap.utils.zlist import ZList
26
+
27
+
28
+ DATA_DIR = Path(
29
+ user_data_dir(
30
+ appname="gismap",
31
+ appauthor=False,
32
+ )
33
+ )
34
+ LDB_STEM = "ldb"
35
+ GITHUB_REPO = "balouf/gismap"
36
+
37
+ LDB_PARAMETERS = Data(
38
+ {
39
+ "search": {"limit": 3, "cutoff": 87.0, "slack": 1.0},
40
+ "bof": {"n_range": 2, "length_impact": 0.1},
41
+ "frame_size": {"authors": 512, "publis": 256},
42
+ "io": {
43
+ "source": "https://dblp.org/rdf/dblp.ttl.gz",
44
+ "destination": DATA_DIR / f"{LDB_STEM}.pkl.zst",
45
+ "metadata": DATA_DIR / f"{LDB_STEM}.json",
46
+ "gh_api": f"https://api.github.com/repos/{GITHUB_REPO}/releases",
47
+ },
48
+ }
49
+ )
50
+ """
51
+ Global configuration parameters for the Local DBLP (LDB) pipeline.
52
+
53
+ Structure:
54
+ - search:
55
+ - limit: maximum number of candidates retrieved per query.
56
+ - cutoff: minimal similarity score required to keep a candidate.
57
+ - slack: tolerance around the cutoff for borderline matches.
58
+ - bof (Bag-of-Factors):
59
+ - n_range: max factor size (higher is better but more expensive).
60
+ - length_impact: how to compare two inputs of different size.
61
+ - frame_size:
62
+ - authors: maximum number of authors kept in a single frame/batch.
63
+ - publis: maximum number of publications kept in a single frame/batch.
64
+ - io:
65
+ - source: URL/file location of the DBLP RDF dump used as raw input.
66
+ - destination: local path where the compressed preprocessed dataset is / will be stored.
67
+ - gh_api: GitHub API endpoint used to fetch release information for the project.
68
+
69
+ LDB_PARAMETERS is a Data (RecursiveDict) instance, so nested fields can be
70
+ accessed with attribute notation, e.g.:
71
+ LDB_PARAMETERS.search.limit
72
+ LDB_PARAMETERS.io.destination
73
+ """
74
+
75
+
76
+ @dataclass(repr=False)
77
+ class LDB(DB):
78
+ """
79
+ Browse DBLP from a local copy of the database.
80
+
81
+ LDB is a class-only database - it should not be instantiated.
82
+ All methods are classmethods and state is stored in class variables.
83
+
84
+ Examples
85
+ --------
86
+
87
+ Public DB methods ensure that the DB is loaded but if you need to use a specific LDB method, prepare the DB first.
88
+
89
+ >>> LDB._ensure_loaded()
90
+ >>> LDB.author_by_key("66/2077")
91
+ LDBAuthor(name='Fabien Mathieu', key='66/2077')
92
+ >>> pubs = sorted(LDB.author_publications('66/2077'), key = lambda p: p.year)
93
+ >>> pub = pubs[0]
94
+ >>> pub.metadata
95
+ {'url': 'http://www2003.org/cdrom/papers/poster/p102/p102-mathieu.htm', 'streams': ['conf/www']}
96
+ >>> LDB.db_info() # doctest: +ELLIPSIS
97
+ {'tag': 'v0.4.0', 'downloaded_at': '2026-...', 'size': ..., 'path': ...}
98
+ >>> LDB.check_update()
99
+ >>> ldb = LDB()
100
+ Traceback (most recent call last):
101
+ ...
102
+ TypeError: LDB should not be instantiated. Use class methods directly, e.g., LDB.search_author(name)
103
+ """
104
+
105
+ db_name: ClassVar[str] = LDB_STEM
106
+ parameters: ClassVar[Data] = LDB_PARAMETERS
107
+
108
+ # Class-level state (replaces instance attributes)
109
+ authors: ClassVar[ZList | None] = None
110
+ publis: ClassVar[ZList | None] = None
111
+ keys: ClassVar[dict | None] = None
112
+ search_engine: ClassVar[Process | None] = None
113
+ _initialized: ClassVar[bool] = False
114
+
115
+ __hash__ = object.__hash__
116
+
117
+ def __init__(self):
118
+ raise TypeError(
119
+ "LDB should not be instantiated. Use class methods directly, e.g., LDB.search_author(name)"
120
+ )
121
+
122
+ @classmethod
123
+ def _ensure_loaded(cls):
124
+ """Lazy-load the database if not already loaded."""
125
+ if cls._initialized:
126
+ return
127
+ if not cls.parameters.io.destination.exists():
128
+ logger.info("LDB not found locally. Attempting to retrieve from GitHub...")
129
+ try:
130
+ cls.retrieve()
131
+ except RuntimeError as e:
132
+ logger.warning(f"Could not auto-retrieve LDB: {e}")
133
+ cls.load_db()
134
+
135
+ @classmethod
136
+ def build_db(cls, limit=None):
137
+ """
138
+ Build the LDB database from a DBLP TTL dump.
139
+
140
+ Parses the DBLP RDF/TTL file to extract publications and authors,
141
+ stores them in compressed ZList structures, and builds a fuzzy
142
+ search engine for author name lookups.
143
+
144
+ Parameters
145
+ ----------
146
+ limit: :class:`int`, optional
147
+ Maximum number of publications to process. If None, processes
148
+ the entire database. Useful for testing with a subset.
149
+
150
+ Notes
151
+ -----
152
+ This method populates the class-level attributes:
153
+
154
+ - ``authors``: ZList of (key, name, publication_indices) tuples
155
+ - ``publis``: ZList of publication records
156
+ - ``keys``: dict mapping author keys to indices
157
+ - ``search_engine``: fuzzy search Process for author lookups
158
+
159
+ After building, call :meth:`dump_db` to persist the database.
160
+
161
+ Examples
162
+ --------
163
+ Build from the default DBLP source:
164
+
165
+ >>> LDB.build_db() # doctest: +SKIP
166
+ >>> LDB.dump_db() # doctest: +SKIP
167
+
168
+ Build a small test database:
169
+
170
+ >>> LDB.build_db(limit=1000)
171
+ >>> LDB.authors[0]
172
+ ('78/459-1', 'Manish Singh', [0])
173
+
174
+ Save your build in a non-default file:
175
+
176
+ >>> from tempfile import TemporaryDirectory
177
+ >>> from pathlib import Path
178
+ >>> with TemporaryDirectory() as tmpdirname:
179
+ ... LDB.dump(filename="test.zst", path=tmpdirname)
180
+ ... [file.name for file in Path(tmpdirname).glob("*")]
181
+ ['test.zst']
182
+
183
+ In case you don't like your build and want to reload your local database from disk:
184
+
185
+ >>> LDB.load_db()
186
+ """
187
+ source = cls.parameters.io.source
188
+ authors_dict = dict()
189
+ logger.info("Retrieve publications")
190
+ with ZList(frame_size=cls.parameters.frame_size.publis) as publis:
191
+ for i, (
192
+ key,
193
+ title,
194
+ typ,
195
+ authors,
196
+ url,
197
+ streams,
198
+ pages,
199
+ venue,
200
+ year,
201
+ ) in enumerate(publis_streamer(source)):
202
+ auth_indices = []
203
+ for auth_key, auth_name in authors.items():
204
+ if auth_key not in authors_dict:
205
+ authors_dict[auth_key] = (len(authors_dict), auth_name, [i])
206
+ else:
207
+ authors_dict[auth_key][2].append(i)
208
+ auth_indices.append(authors_dict[auth_key][0])
209
+ publis.append(
210
+ (key, title, typ, auth_indices, url, streams, pages, venue, year)
211
+ )
212
+ if i == limit:
213
+ break
214
+ cls.publis = publis
215
+ logger.info(f"{len(publis)} publications retrieved.")
216
+ logger.info("Compact authors")
217
+ with ZList(frame_size=cls.parameters.frame_size.authors) as authors:
218
+ for key, (_, name, pubs) in tqdm(authors_dict.items()):
219
+ authors.append((key, name, pubs))
220
+ cls.authors = authors
221
+ cls.keys = {k: v[0] for k, v in authors_dict.items()}
222
+ del authors_dict
223
+ cls._build_search_engine()
224
+ cls._invalidate_cache()
225
+ cls._initialized = True
226
+
227
+ @classmethod
228
+ def _build_search_engine(cls):
229
+ cls.search_engine = Process(
230
+ n_range=cls.parameters.bof.n_range,
231
+ length_impact=cls.parameters.bof.length_impact,
232
+ )
233
+ cls.search_engine.fit([normalized_name(a[1]) for a in cls.authors])
234
+ cls.search_engine.choices = np.arange(len(cls.authors))
235
+ cls.search_engine.vectorizer.features_ = cls.numbify_dict(
236
+ cls.search_engine.vectorizer.features_
237
+ )
238
+ logger.info(f"{len(cls.authors)} authors indexed.")
239
+
240
+ @classmethod
241
+ @lru_cache(maxsize=50000)
242
+ def author_by_index(cls, i):
243
+ key, name, _ = cls.authors[i]
244
+ return LDBAuthor(key=key, name=name)
245
+
246
+ @classmethod
247
+ def author_by_key(cls, key):
248
+ return cls.author_by_index(cls.keys[key])
249
+
250
+ @classmethod
251
+ @lru_cache(maxsize=50000)
252
+ def publication_by_index(cls, i):
253
+ key, title, typ, authors, url, streams, pages, venue, year = cls.publis[i]
254
+ if venue is None:
255
+ venue = "unpublished"
256
+ return {
257
+ "key": key,
258
+ "title": title,
259
+ "type": typ,
260
+ "authors": authors,
261
+ "url": url,
262
+ "streams": streams,
263
+ "pages": pages,
264
+ "venue": venue,
265
+ "year": year,
266
+ }
267
+
268
+ @classmethod
269
+ def author_publications(cls, key):
270
+ cls._ensure_loaded()
271
+ _, name, pubs = cls.authors[cls.keys[key]]
272
+ pubs = [cls.publication_by_index(k).copy() for k in pubs]
273
+ auth_ids = sorted({k for p in pubs for k in p["authors"]})
274
+ auths = {k: cls.author_by_index(k) for k in auth_ids}
275
+ for pub in pubs:
276
+ pub["authors"] = [auths[k] for k in pub["authors"]]
277
+ metadata = dict()
278
+ for k in ["url", "streams", "pages"]:
279
+ v = pub.pop(k)
280
+ if v is not None:
281
+ metadata[k] = v
282
+ pub["metadata"] = metadata
283
+ return [LDBPublication(**pub) for pub in pubs]
284
+
285
+ @classmethod
286
+ @lru_cache(maxsize=1000)
287
+ def search_author(cls, name):
288
+ cls._ensure_loaded()
289
+ res = cls.search_engine.extract(
290
+ normalized_name(name),
291
+ limit=cls.parameters.search.limit,
292
+ )
293
+ if not res:
294
+ return []
295
+ target = max(cls.parameters.search.cutoff, res[0][1] - cls.parameters.search.slack)
296
+ res = [r[0] for r in res if r[1] > target]
297
+ sorted_ids = {i: cls.author_by_index(i) for i in sorted(res)}
298
+ return [sorted_ids[i] for i in res]
299
+
300
+ @classmethod
301
+ def _invalidate_cache(cls):
302
+ cls.search_author.cache_clear()
303
+ cls.publication_by_index.cache_clear()
304
+ cls.author_by_index.cache_clear()
305
+
306
+ @classmethod
307
+ def from_author(cls, a):
308
+ return cls.author_publications(a.key)
309
+
310
+ @classmethod
311
+ def _get_release_info(cls, tag: str | None = None) -> dict:
312
+ """
313
+ Fetch release metadata from GitHub API.
314
+
315
+ Parameters
316
+ ----------
317
+ tag: :class:`str`, optional
318
+ Specific release tag (e.g., "v0.4.0"). If None, fetches latest.
319
+
320
+ Returns
321
+ -------
322
+ :class:`dict`
323
+ Release metadata including tag_name and assets.
324
+
325
+ Raises
326
+ ------
327
+ :class:`RuntimeError`
328
+ If release not found or API request fails.
329
+ """
330
+ api_url = cls.parameters.io.gh_api
331
+ if tag is None:
332
+ url = f"{api_url}/latest"
333
+ else:
334
+ url = f"{api_url}/tags/{tag}"
335
+
336
+ try:
337
+ response = requests.get(url, timeout=30)
338
+ response.raise_for_status()
339
+ return response.json()
340
+ except requests.exceptions.HTTPError as e:
341
+ if response.status_code == 404:
342
+ raise RuntimeError(f"Release not found: {tag or 'latest'}") from e
343
+ raise RuntimeError(f"GitHub API error: {e}") from e
344
+ except requests.exceptions.RequestException as e:
345
+ raise RuntimeError(f"Network error fetching release info: {e}") from e
346
+
347
+ @classmethod
348
+ def _download_file(cls, url: str, dest: Path, desc: str = "Downloading"):
349
+ """
350
+ Download file with progress bar.
351
+
352
+ Parameters
353
+ ----------
354
+ url : str
355
+ URL to download from.
356
+ dest : Path
357
+ Destination file path.
358
+ desc : str
359
+ Description for progress bar.
360
+ """
361
+ dest.parent.mkdir(parents=True, exist_ok=True)
362
+
363
+ response = requests.get(url, stream=True, timeout=30)
364
+ response.raise_for_status()
365
+
366
+ total_size = int(response.headers.get("content-length", 0))
367
+
368
+ with (
369
+ open(dest, "wb") as f,
370
+ tqdm(
371
+ desc=desc,
372
+ total=total_size,
373
+ unit="B",
374
+ unit_scale=True,
375
+ unit_divisor=1024,
376
+ ) as pbar,
377
+ ):
378
+ for chunk in response.iter_content(chunk_size=8192):
379
+ if chunk:
380
+ f.write(chunk)
381
+ pbar.update(len(chunk))
382
+
383
+ @classmethod
384
+ def _save_meta(cls, tag: str, url: str, size: int):
385
+ """Save version metadata to JSON file."""
386
+ meta = {
387
+ "tag": tag,
388
+ "url": url,
389
+ "size": size,
390
+ "downloaded_at": datetime.now(timezone.utc).isoformat(),
391
+ }
392
+ meta_path = cls.parameters.io.metadata
393
+ meta_path.parent.mkdir(parents=True, exist_ok=True)
394
+ with open(meta_path, "w") as f:
395
+ json.dump(meta, f, indent=2)
396
+
397
+ @classmethod
398
+ def _load_meta(cls) -> dict | None:
399
+ """Load version metadata from JSON file."""
400
+ meta_path = cls.parameters.io.metadata
401
+ if not meta_path.exists():
402
+ return None
403
+ try:
404
+ with open(meta_path, "r") as f:
405
+ return json.load(f)
406
+ except (json.JSONDecodeError, IOError):
407
+ return None
408
+
409
+ @classmethod
410
+ def retrieve(cls, version: str | None = None, force: bool = False):
411
+ """
412
+ Download LDB database from GitHub releases.
413
+
414
+ Parameters
415
+ ----------
416
+ version: :class:`str`, optional
417
+ Specific release version (e.g., "v0.4.0" or "0.4.0").
418
+ If None, downloads from latest release.
419
+ force: :class:`bool`, default=False
420
+ Download even if same version is installed.
421
+
422
+ Examples
423
+ --------
424
+
425
+ The following will get you a LDB if you do not have one.
426
+
427
+ >>> LDB.retrieve() # Latest release (freshest data)
428
+ >>> LDB.retrieve("v0.4.0") # Specific version
429
+ >>> LDB.retrieve("0.4.0") # Also works without 'v' prefix
430
+
431
+ Of course, the tag/version must be LDB-ready.
432
+ >>> LDB.retrieve("v0.3.0") # Too old for LDB
433
+ Traceback (most recent call last):
434
+ ...
435
+ RuntimeError: Asset 'ldb.pkl.zst' not found in release v0.3.0. Available assets: []
436
+
437
+ Raises
438
+ ------
439
+ RuntimeError
440
+ If release or asset not found, or download fails.
441
+ """
442
+ # Normalize version string (add "v" prefix if missing)
443
+ tag = None
444
+ if version is not None:
445
+ tag = version if version.startswith("v") else f"v{version}"
446
+
447
+ # Fetch release info
448
+ logger.info(f"Fetching release info for: {tag or 'latest'}")
449
+ release_info = cls._get_release_info(tag)
450
+ release_tag = release_info["tag_name"]
451
+
452
+ destination = cls.parameters.io.destination
453
+
454
+ # Check if already installed (unless force=True)
455
+ if not force:
456
+ meta = cls._load_meta()
457
+ if meta and meta.get("tag") == release_tag and destination.exists():
458
+ logger.info(
459
+ f"LDB version {release_tag} already installed. Use force=True to re-download."
460
+ )
461
+ return
462
+
463
+ # Find ldb.pkl.zst asset in release
464
+ assets = release_info.get("assets", [])
465
+ ldb_asset = None
466
+ for asset in assets:
467
+ if asset["name"] == destination.name:
468
+ ldb_asset = asset
469
+ break
470
+
471
+ if ldb_asset is None:
472
+ raise RuntimeError(
473
+ f"Asset '{destination.name}' not found in release {release_tag}. "
474
+ f"Available assets: {[a['name'] for a in assets]}"
475
+ )
476
+
477
+ download_url = ldb_asset["browser_download_url"]
478
+ asset_size = ldb_asset["size"]
479
+
480
+ logger.info(
481
+ f"Downloading LDB from release {release_tag} ({asset_size / 1e9:.2f} GB)"
482
+ )
483
+
484
+ # Download with progress bar
485
+ cls._download_file(download_url, destination, desc=f"LDB {release_tag}")
486
+
487
+ # Save version metadata
488
+ cls._save_meta(release_tag, download_url, asset_size)
489
+
490
+ # Load database and rebuild search engine locally
491
+ cls.load_db(restore_search=True)
492
+
493
+ logger.info(f"LDB {release_tag} successfully installed to {destination}")
494
+
495
+ @classmethod
496
+ def db_info(cls) -> dict | None:
497
+ """
498
+ Return installed version info.
499
+
500
+ Returns
501
+ -------
502
+ :class:`dict` or :class:`None`
503
+ Dictionary with tag, date, size, path; or None if not installed.
504
+ """
505
+ meta = cls._load_meta()
506
+ destination = cls.parameters.io.destination
507
+ if meta is None or not destination.exists():
508
+ return None
509
+
510
+ return {
511
+ "tag": meta.get("tag"),
512
+ "downloaded_at": meta.get("downloaded_at"),
513
+ "size": meta.get("size"),
514
+ "path": str(destination),
515
+ }
516
+
517
+ @classmethod
518
+ def check_update(cls) -> dict | None:
519
+ """
520
+ Check if a newer version is available on GitHub.
521
+
522
+ Returns
523
+ -------
524
+ :class:`dict` or None
525
+ Dictionary with update info if available, None if up to date.
526
+ """
527
+ try:
528
+ release_info = cls._get_release_info()
529
+ latest_tag = release_info["tag_name"]
530
+
531
+ meta = cls._load_meta()
532
+ current_tag = meta.get("tag") if meta else None
533
+
534
+ if current_tag == latest_tag:
535
+ logger.info(f"LDB is up to date: {current_tag}")
536
+ return None
537
+
538
+ return {
539
+ "current": current_tag,
540
+ "latest": latest_tag,
541
+ "message": f"Update available: {current_tag or 'not installed'} -> {latest_tag}",
542
+ }
543
+ except RuntimeError as e:
544
+ logger.warning(f"Could not check for updates: {e}")
545
+ return None
546
+
547
+ @classmethod
548
+ def dump(cls, filename: str, path=".", overwrite=False, include_search=True):
549
+ """Save class state to file."""
550
+ # Convert numba dict to regular dict for pickling
551
+ nb_dict = None
552
+ if include_search and cls.search_engine is not None:
553
+ nb_dict = cls.search_engine.vectorizer.features_
554
+ cls.search_engine.vectorizer.features_ = dict(nb_dict)
555
+
556
+ state = {
557
+ "authors": cls.authors,
558
+ "publis": cls.publis,
559
+ "keys": cls.keys,
560
+ "search_engine": cls.search_engine if include_search else None,
561
+ }
562
+
563
+ # Use safe_write pattern from gismo.common
564
+ destination = Path(path) / filename
565
+ if destination.exists() and not overwrite:
566
+ print(
567
+ f"File {destination} already exists! Use overwrite option to overwrite."
568
+ )
569
+ else:
570
+ with safe_write(destination) as f:
571
+ cctx = zstd.ZstdCompressor(level=3)
572
+ with cctx.stream_writer(f) as z:
573
+ pickle.dump(state, z, protocol=5)
574
+
575
+ # Restore numba dict
576
+ if include_search and cls.search_engine is not None:
577
+ cls.search_engine.vectorizer.features_ = nb_dict
578
+
579
+ @classmethod
580
+ def load(cls, filename: str, path=".", restore_search=False):
581
+ """Load class state from file."""
582
+ dest = Path(path) / filename
583
+ if not dest.exists():
584
+ raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), dest)
585
+
586
+ dctx = zstd.ZstdDecompressor()
587
+ with open(dest, "rb") as f, dctx.stream_reader(f) as z:
588
+ state = pickle.load(z)
589
+
590
+ cls.authors = state["authors"]
591
+ cls.publis = state["publis"]
592
+ cls.keys = state["keys"]
593
+ cls.search_engine = state["search_engine"]
594
+
595
+ if restore_search:
596
+ cls._build_search_engine()
597
+ cls.dump(filename=filename, path=path, overwrite=True, include_search=True)
598
+ elif cls.search_engine is not None:
599
+ cls.search_engine.vectorizer.features_ = cls.numbify_dict(
600
+ cls.search_engine.vectorizer.features_
601
+ )
602
+
603
+ cls._invalidate_cache()
604
+ cls._initialized = True
605
+
606
+ @classmethod
607
+ def dump_db(cls, include_search=True):
608
+ destination = cls.parameters.io.destination
609
+ destination.parent.mkdir(parents=True, exist_ok=True)
610
+ cls.dump(
611
+ destination.name,
612
+ path=destination.parent,
613
+ overwrite=True,
614
+ include_search=include_search,
615
+ )
616
+
617
+ @classmethod
618
+ def load_db(cls, restore_search=False):
619
+ destination = cls.parameters.io.destination
620
+ try:
621
+ cls.load(
622
+ destination.name, path=destination.parent, restore_search=restore_search
623
+ )
624
+ except FileNotFoundError:
625
+ logger.warning("No LDB found. Building from source...")
626
+ cls.build_db()
627
+ cls.dump_db()
628
+ except TypeError as e:
629
+ if "code expected at most" in str(e):
630
+ logger.warning(
631
+ "LDB file incompatible with this Python version. Rebuilding from source..."
632
+ )
633
+ cls.build_db()
634
+ cls.dump_db()
635
+ else:
636
+ raise
637
+
638
+ @classmethod
639
+ def delete_db(cls):
640
+ destination = cls.parameters.io.destination
641
+ if destination.exists():
642
+ destination.unlink()
643
+
644
+ @staticmethod
645
+ def numbify_dict(input_dict):
646
+ nb_dict = nb.typed.Dict.empty(
647
+ key_type=nb.types.unicode_type, value_type=nb.types.int64
648
+ )
649
+ for k, v in input_dict.items():
650
+ nb_dict[k] = v
651
+ return nb_dict
652
+
653
+
654
+ @dataclass(repr=False)
655
+ class LDBAuthor(Author, LDB):
656
+ """
657
+ Author from the LDB (Local DBLP) database.
658
+
659
+ LDB provides local access to DBLP data without rate limiting.
660
+
661
+ Parameters
662
+ ----------
663
+ name: :class:`str`
664
+ The author's name.
665
+ key: :class:`str`
666
+ DBLP person identifier (pid).
667
+ aliases: :class:`list`
668
+ Alternative names for the author.
669
+ """
670
+
671
+ key: str
672
+ aliases: list = field(default_factory=list)
673
+
674
+ @property
675
+ def url(self):
676
+ return f"https://dblp.org/pid/{self.key}.html"
677
+
678
+ def get_publications(self):
679
+ return LDB.from_author(self)
680
+
681
+
682
+ @dataclass(repr=False)
683
+ class LDBPublication(Publication, LDB):
684
+ """
685
+ Publication from the LDB (Local DBLP) database.
686
+
687
+ Parameters
688
+ ----------
689
+ title: :class:`str`
690
+ Publication title.
691
+ authors: :class:`list`
692
+ List of :class:`LDBAuthor` objects.
693
+ venue: :class:`str`
694
+ Publication venue.
695
+ type: :class:`str`
696
+ Publication type.
697
+ year: :class:`int`
698
+ Publication year.
699
+ key: :class:`str`
700
+ DBLP record key.
701
+ metadata: :class:`dict`
702
+ Additional metadata (URL, streams, pages).
703
+ """
704
+
705
+ key: str
706
+ metadata: dict = field(default_factory=dict)
707
+
708
+ @property
709
+ def url(self):
710
+ return self.metadata.get("url", f"https://dblp.org/rec/{self.key}.html")
711
+
712
+ @property
713
+ def stream(self):
714
+ if "streams" in self.metadata:
715
+ return f"https://dblp.org/streams/{self.metadata['streams'][0]}"
716
+ return None