gismap 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gismap/sources/ldb.py CHANGED
@@ -19,27 +19,58 @@ import requests
19
19
 
20
20
  from gismap.sources.dblp_ttl import publis_streamer
21
21
  from gismap.sources.models import DB, Author, Publication
22
+ from gismap.utils.common import Data
22
23
  from gismap.utils.logger import logger
23
- from gismap.utils.text import asciify
24
+ from gismap.utils.text import normalized_name
24
25
  from gismap.utils.zlist import ZList
25
26
 
26
27
 
27
- DATA_DIR = Path(user_data_dir(
28
- appname="gismap",
29
- appauthor=False,
30
- ))
31
-
28
+ DATA_DIR = Path(
29
+ user_data_dir(
30
+ appname="gismap",
31
+ appauthor=False,
32
+ )
33
+ )
32
34
  LDB_STEM = "ldb"
33
-
34
- LDB_PATH = DATA_DIR / f"{LDB_STEM}.pkl.zst"
35
-
36
- TTL_URL = "https://dblp.org/rdf/dblp.ttl.gz"
37
-
38
- # GitHub release asset constants
39
35
  GITHUB_REPO = "balouf/gismap"
40
- GITHUB_API_URL = f"https://api.github.com/repos/{GITHUB_REPO}/releases"
41
- LDB_ASSET_NAME = "ldb.pkl.zst"
42
- LDB_META_PATH = DATA_DIR / "ldb_meta.json"
36
+
37
+ LDB_PARAMETERS = Data(
38
+ {
39
+ "search": {"limit": 3, "cutoff": 87.0, "slack": 1.0},
40
+ "bof": {"n_range": 2, "length_impact": 0.1},
41
+ "frame_size": {"authors": 512, "publis": 256},
42
+ "io": {
43
+ "source": "https://dblp.org/rdf/dblp.ttl.gz",
44
+ "destination": DATA_DIR / f"{LDB_STEM}.pkl.zst",
45
+ "metadata": DATA_DIR / f"{LDB_STEM}.json",
46
+ "gh_api": f"https://api.github.com/repos/{GITHUB_REPO}/releases",
47
+ },
48
+ }
49
+ )
50
+ """
51
+ Global configuration parameters for the Local DBLP (LDB) pipeline.
52
+
53
+ Structure:
54
+ - search:
55
+ - limit: maximum number of candidates retrieved per query.
56
+ - cutoff: minimal similarity score required to keep a candidate.
57
+ - slack: tolerance around the cutoff for borderline matches.
58
+ - bof (Bag-of-Factors):
59
+ - n_range: max factor size (higher is better but more expensive).
60
+ - length_impact: how to compare two inputs of different size.
61
+ - frame_size:
62
+ - authors: maximum number of authors kept in a single frame/batch.
63
+ - publis: maximum number of publications kept in a single frame/batch.
64
+ - io:
65
+ - source: URL/file location of the DBLP RDF dump used as raw input.
66
+ - destination: local path where the compressed preprocessed dataset is / will be stored.
67
+ - gh_api: GitHub API endpoint used to fetch release information for the project.
68
+
69
+ LDB_PARAMETERS is a Data (RecursiveDict) instance, so nested fields can be
70
+ accessed with attribute notation, e.g.:
71
+ LDB_PARAMETERS.search.limit
72
+ LDB_PARAMETERS.io.destination
73
+ """
43
74
 
44
75
 
45
76
  @dataclass(repr=False)
@@ -49,9 +80,30 @@ class LDB(DB):
49
80
 
50
81
  LDB is a class-only database - it should not be instantiated.
51
82
  All methods are classmethods and state is stored in class variables.
83
+
84
+ Examples
85
+ --------
86
+
87
+ Public DB methods ensure that the DB is loaded but if you need to use a specific LDB method, prepare the DB first.
88
+
89
+ >>> LDB._ensure_loaded()
90
+ >>> LDB.author_by_key("66/2077")
91
+ LDBAuthor(name='Fabien Mathieu', key='66/2077')
92
+ >>> pubs = sorted(LDB.author_publications('66/2077'), key = lambda p: p.year)
93
+ >>> pub = pubs[0]
94
+ >>> pub.metadata
95
+ {'url': 'http://www2003.org/cdrom/papers/poster/p102/p102-mathieu.htm', 'streams': ['conf/www']}
96
+ >>> LDB.db_info() # doctest: +ELLIPSIS
97
+ {'tag': 'v0.4.0', 'downloaded_at': '2026-...', 'size': ..., 'path': ...}
98
+ >>> LDB.check_update()
99
+ >>> ldb = LDB()
100
+ Traceback (most recent call last):
101
+ ...
102
+ TypeError: LDB should not be instantiated. Use class methods directly, e.g., LDB.search_author(name)
52
103
  """
104
+
53
105
  db_name: ClassVar[str] = LDB_STEM
54
- source: ClassVar[str] = TTL_URL
106
+ parameters: ClassVar[Data] = LDB_PARAMETERS
55
107
 
56
108
  # Class-level state (replaces instance attributes)
57
109
  authors: ClassVar[ZList | None] = None
@@ -72,24 +124,81 @@ class LDB(DB):
72
124
  """Lazy-load the database if not already loaded."""
73
125
  if cls._initialized:
74
126
  return
75
- if LDB_PATH.exists():
76
- cls.load_db()
77
- else:
127
+ if not cls.parameters.io.destination.exists():
78
128
  logger.info("LDB not found locally. Attempting to retrieve from GitHub...")
79
129
  try:
80
130
  cls.retrieve()
81
- cls.load_db()
82
131
  except RuntimeError as e:
83
132
  logger.warning(f"Could not auto-retrieve LDB: {e}")
133
+ cls.load_db()
84
134
 
85
135
  @classmethod
86
- def build_db(cls, source=None, limit=None, n_range=2, length_impact=.1, authors_frame=512, publis_frame=256):
87
- if source is None:
88
- source = cls.source
136
+ def build_db(cls, limit=None):
137
+ """
138
+ Build the LDB database from a DBLP TTL dump.
139
+
140
+ Parses the DBLP RDF/TTL file to extract publications and authors,
141
+ stores them in compressed ZList structures, and builds a fuzzy
142
+ search engine for author name lookups.
143
+
144
+ Parameters
145
+ ----------
146
+ limit: :class:`int`, optional
147
+ Maximum number of publications to process. If None, processes
148
+ the entire database. Useful for testing with a subset.
149
+
150
+ Notes
151
+ -----
152
+ This method populates the class-level attributes:
153
+
154
+ - ``authors``: ZList of (key, name, publication_indices) tuples
155
+ - ``publis``: ZList of publication records
156
+ - ``keys``: dict mapping author keys to indices
157
+ - ``search_engine``: fuzzy search Process for author lookups
158
+
159
+ After building, call :meth:`dump_db` to persist the database.
160
+
161
+ Examples
162
+ --------
163
+ Build from the default DBLP source:
164
+
165
+ >>> LDB.build_db() # doctest: +SKIP
166
+ >>> LDB.dump_db() # doctest: +SKIP
167
+
168
+ Build a small test database:
169
+
170
+ >>> LDB.build_db(limit=1000)
171
+ >>> LDB.authors[0]
172
+ ('78/459-1', 'Manish Singh', [0])
173
+
174
+ Save your build in a non-default file:
175
+
176
+ >>> from tempfile import TemporaryDirectory
177
+ >>> from pathlib import Path
178
+ >>> with TemporaryDirectory() as tmpdirname:
179
+ ... LDB.dump(filename="test.zst", path=tmpdirname)
180
+ ... [file.name for file in Path(tmpdirname).glob("*")]
181
+ ['test.zst']
182
+
183
+ In case you don't like your build and want to reload your local database from disk:
184
+
185
+ >>> LDB.load_db()
186
+ """
187
+ source = cls.parameters.io.source
89
188
  authors_dict = dict()
90
189
  logger.info("Retrieve publications")
91
- with ZList(frame_size=publis_frame) as publis:
92
- for i, (key, title, typ, authors, url, streams, pages, venue, year) in enumerate(publis_streamer(source)):
190
+ with ZList(frame_size=cls.parameters.frame_size.publis) as publis:
191
+ for i, (
192
+ key,
193
+ title,
194
+ typ,
195
+ authors,
196
+ url,
197
+ streams,
198
+ pages,
199
+ venue,
200
+ year,
201
+ ) in enumerate(publis_streamer(source)):
93
202
  auth_indices = []
94
203
  for auth_key, auth_name in authors.items():
95
204
  if auth_key not in authors_dict:
@@ -97,26 +206,37 @@ class LDB(DB):
97
206
  else:
98
207
  authors_dict[auth_key][2].append(i)
99
208
  auth_indices.append(authors_dict[auth_key][0])
100
- publis.append((key, title, typ, auth_indices, url, streams, pages, venue, year))
209
+ publis.append(
210
+ (key, title, typ, auth_indices, url, streams, pages, venue, year)
211
+ )
101
212
  if i == limit:
102
213
  break
103
214
  cls.publis = publis
104
215
  logger.info(f"{len(publis)} publications retrieved.")
105
216
  logger.info("Compact authors")
106
- with ZList(frame_size=authors_frame) as authors:
217
+ with ZList(frame_size=cls.parameters.frame_size.authors) as authors:
107
218
  for key, (_, name, pubs) in tqdm(authors_dict.items()):
108
219
  authors.append((key, name, pubs))
109
220
  cls.authors = authors
110
221
  cls.keys = {k: v[0] for k, v in authors_dict.items()}
111
222
  del authors_dict
112
- cls.search_engine = Process(n_range=n_range, length_impact=length_impact)
113
- cls.search_engine.fit([asciify(a[1]) for a in authors])
114
- cls.search_engine.choices = np.arange(len(authors))
115
- cls.search_engine.vectorizer.features_ = cls.numbify_dict(cls.search_engine.vectorizer.features_)
116
- logger.info(f"{len(cls.authors)} compacted.")
223
+ cls._build_search_engine()
117
224
  cls._invalidate_cache()
118
225
  cls._initialized = True
119
226
 
227
+ @classmethod
228
+ def _build_search_engine(cls):
229
+ cls.search_engine = Process(
230
+ n_range=cls.parameters.bof.n_range,
231
+ length_impact=cls.parameters.bof.length_impact,
232
+ )
233
+ cls.search_engine.fit([normalized_name(a[1]) for a in cls.authors])
234
+ cls.search_engine.choices = np.arange(len(cls.authors))
235
+ cls.search_engine.vectorizer.features_ = cls.numbify_dict(
236
+ cls.search_engine.vectorizer.features_
237
+ )
238
+ logger.info(f"{len(cls.authors)} authors indexed.")
239
+
120
240
  @classmethod
121
241
  @lru_cache(maxsize=50000)
122
242
  def author_by_index(cls, i):
@@ -133,10 +253,17 @@ class LDB(DB):
133
253
  key, title, typ, authors, url, streams, pages, venue, year = cls.publis[i]
134
254
  if venue is None:
135
255
  venue = "unpublished"
136
- return {"key": key, "title": title, "type": typ,
137
- "authors": authors,
138
- "url": url, "streams": streams, "pages": pages,
139
- "venue": venue, "year": year}
256
+ return {
257
+ "key": key,
258
+ "title": title,
259
+ "type": typ,
260
+ "authors": authors,
261
+ "url": url,
262
+ "streams": streams,
263
+ "pages": pages,
264
+ "venue": venue,
265
+ "year": year,
266
+ }
140
267
 
141
268
  @classmethod
142
269
  def author_publications(cls, key):
@@ -157,10 +284,16 @@ class LDB(DB):
157
284
 
158
285
  @classmethod
159
286
  @lru_cache(maxsize=1000)
160
- def search_author(cls, name, limit=2, score_cutoff=40.0, slack=10.0):
287
+ def search_author(cls, name):
161
288
  cls._ensure_loaded()
162
- res = cls.search_engine.extract(asciify(name), limit=limit, score_cutoff=score_cutoff)
163
- res = [r[0] for r in res if r[1] > res[0][1] - slack]
289
+ res = cls.search_engine.extract(
290
+ normalized_name(name),
291
+ limit=cls.parameters.search.limit,
292
+ )
293
+ if not res:
294
+ return []
295
+ target = max(cls.parameters.search.cutoff, res[0][1] - cls.parameters.search.slack)
296
+ res = [r[0] for r in res if r[1] > target]
164
297
  sorted_ids = {i: cls.author_by_index(i) for i in sorted(res)}
165
298
  return [sorted_ids[i] for i in res]
166
299
 
@@ -194,10 +327,11 @@ class LDB(DB):
194
327
  :class:`RuntimeError`
195
328
  If release not found or API request fails.
196
329
  """
330
+ api_url = cls.parameters.io.gh_api
197
331
  if tag is None:
198
- url = f"{GITHUB_API_URL}/latest"
332
+ url = f"{api_url}/latest"
199
333
  else:
200
- url = f"{GITHUB_API_URL}/tags/{tag}"
334
+ url = f"{api_url}/tags/{tag}"
201
335
 
202
336
  try:
203
337
  response = requests.get(url, timeout=30)
@@ -229,15 +363,18 @@ class LDB(DB):
229
363
  response = requests.get(url, stream=True, timeout=30)
230
364
  response.raise_for_status()
231
365
 
232
- total_size = int(response.headers.get('content-length', 0))
233
-
234
- with open(dest, 'wb') as f, tqdm(
235
- desc=desc,
236
- total=total_size,
237
- unit='B',
238
- unit_scale=True,
239
- unit_divisor=1024,
240
- ) as pbar:
366
+ total_size = int(response.headers.get("content-length", 0))
367
+
368
+ with (
369
+ open(dest, "wb") as f,
370
+ tqdm(
371
+ desc=desc,
372
+ total=total_size,
373
+ unit="B",
374
+ unit_scale=True,
375
+ unit_divisor=1024,
376
+ ) as pbar,
377
+ ):
241
378
  for chunk in response.iter_content(chunk_size=8192):
242
379
  if chunk:
243
380
  f.write(chunk)
@@ -252,17 +389,19 @@ class LDB(DB):
252
389
  "size": size,
253
390
  "downloaded_at": datetime.now(timezone.utc).isoformat(),
254
391
  }
255
- LDB_META_PATH.parent.mkdir(parents=True, exist_ok=True)
256
- with open(LDB_META_PATH, 'w') as f:
392
+ meta_path = cls.parameters.io.metadata
393
+ meta_path.parent.mkdir(parents=True, exist_ok=True)
394
+ with open(meta_path, "w") as f:
257
395
  json.dump(meta, f, indent=2)
258
396
 
259
397
  @classmethod
260
398
  def _load_meta(cls) -> dict | None:
261
399
  """Load version metadata from JSON file."""
262
- if not LDB_META_PATH.exists():
400
+ meta_path = cls.parameters.io.metadata
401
+ if not meta_path.exists():
263
402
  return None
264
403
  try:
265
- with open(LDB_META_PATH, 'r') as f:
404
+ with open(meta_path, "r") as f:
266
405
  return json.load(f)
267
406
  except (json.JSONDecodeError, IOError):
268
407
  return None
@@ -282,9 +421,18 @@ class LDB(DB):
282
421
 
283
422
  Examples
284
423
  --------
285
- >> LDB.retrieve() # Latest release (freshest data)
286
- >> LDB.retrieve("v0.4.0") # Specific version
287
- >> LDB.retrieve("0.4.0") # Also works without 'v' prefix
424
+
425
+ The following will get you a LDB if you do not have one.
426
+
427
+ >>> LDB.retrieve() # Latest release (freshest data)
428
+ >>> LDB.retrieve("v0.4.0") # Specific version
429
+ >>> LDB.retrieve("0.4.0") # Also works without 'v' prefix
430
+
431
+ Of course, the tag/version must be LDB-ready.
432
+ >>> LDB.retrieve("v0.3.0") # Too old for LDB
433
+ Traceback (most recent call last):
434
+ ...
435
+ RuntimeError: Asset 'ldb.pkl.zst' not found in release v0.3.0. Available assets: []
288
436
 
289
437
  Raises
290
438
  ------
@@ -301,43 +449,48 @@ class LDB(DB):
301
449
  release_info = cls._get_release_info(tag)
302
450
  release_tag = release_info["tag_name"]
303
451
 
452
+ destination = cls.parameters.io.destination
453
+
304
454
  # Check if already installed (unless force=True)
305
455
  if not force:
306
456
  meta = cls._load_meta()
307
- if meta and meta.get("tag") == release_tag and LDB_PATH.exists():
308
- logger.info(f"LDB version {release_tag} already installed. Use force=True to re-download.")
457
+ if meta and meta.get("tag") == release_tag and destination.exists():
458
+ logger.info(
459
+ f"LDB version {release_tag} already installed. Use force=True to re-download."
460
+ )
309
461
  return
310
462
 
311
463
  # Find ldb.pkl.zst asset in release
312
464
  assets = release_info.get("assets", [])
313
465
  ldb_asset = None
314
466
  for asset in assets:
315
- if asset["name"] == LDB_ASSET_NAME:
467
+ if asset["name"] == destination.name:
316
468
  ldb_asset = asset
317
469
  break
318
470
 
319
471
  if ldb_asset is None:
320
472
  raise RuntimeError(
321
- f"Asset '{LDB_ASSET_NAME}' not found in release {release_tag}. "
473
+ f"Asset '{destination.name}' not found in release {release_tag}. "
322
474
  f"Available assets: {[a['name'] for a in assets]}"
323
475
  )
324
476
 
325
477
  download_url = ldb_asset["browser_download_url"]
326
478
  asset_size = ldb_asset["size"]
327
479
 
328
- logger.info(f"Downloading LDB from release {release_tag} ({asset_size / 1e9:.2f} GB)")
480
+ logger.info(
481
+ f"Downloading LDB from release {release_tag} ({asset_size / 1e9:.2f} GB)"
482
+ )
329
483
 
330
484
  # Download with progress bar
331
- cls._download_file(download_url, LDB_PATH, desc=f"LDB {release_tag}")
485
+ cls._download_file(download_url, destination, desc=f"LDB {release_tag}")
332
486
 
333
487
  # Save version metadata
334
488
  cls._save_meta(release_tag, download_url, asset_size)
335
489
 
336
- # Reset initialized flag so next access reloads
337
- cls._initialized = False
338
- cls._invalidate_cache()
490
+ # Load database and rebuild search engine locally
491
+ cls.load_db(restore_search=True)
339
492
 
340
- logger.info(f"LDB {release_tag} successfully installed to {LDB_PATH}")
493
+ logger.info(f"LDB {release_tag} successfully installed to {destination}")
341
494
 
342
495
  @classmethod
343
496
  def db_info(cls) -> dict | None:
@@ -350,14 +503,15 @@ class LDB(DB):
350
503
  Dictionary with tag, date, size, path; or None if not installed.
351
504
  """
352
505
  meta = cls._load_meta()
353
- if meta is None or not LDB_PATH.exists():
506
+ destination = cls.parameters.io.destination
507
+ if meta is None or not destination.exists():
354
508
  return None
355
509
 
356
510
  return {
357
511
  "tag": meta.get("tag"),
358
512
  "downloaded_at": meta.get("downloaded_at"),
359
513
  "size": meta.get("size"),
360
- "path": str(LDB_PATH),
514
+ "path": str(destination),
361
515
  }
362
516
 
363
517
  @classmethod
@@ -391,25 +545,27 @@ class LDB(DB):
391
545
  return None
392
546
 
393
547
  @classmethod
394
- def dump(cls, filename: str, path=".", overwrite=False):
548
+ def dump(cls, filename: str, path=".", overwrite=False, include_search=True):
395
549
  """Save class state to file."""
396
550
  # Convert numba dict to regular dict for pickling
397
551
  nb_dict = None
398
- if cls.search_engine is not None:
552
+ if include_search and cls.search_engine is not None:
399
553
  nb_dict = cls.search_engine.vectorizer.features_
400
554
  cls.search_engine.vectorizer.features_ = dict(nb_dict)
401
555
 
402
556
  state = {
403
- 'authors': cls.authors,
404
- 'publis': cls.publis,
405
- 'keys': cls.keys,
406
- 'search_engine': cls.search_engine,
557
+ "authors": cls.authors,
558
+ "publis": cls.publis,
559
+ "keys": cls.keys,
560
+ "search_engine": cls.search_engine if include_search else None,
407
561
  }
408
562
 
409
563
  # Use safe_write pattern from gismo.common
410
- destination = Path(path) / f"{Path(filename).stem}.pkl.zst"
564
+ destination = Path(path) / filename
411
565
  if destination.exists() and not overwrite:
412
- print(f"File {destination} already exists! Use overwrite option to overwrite.")
566
+ print(
567
+ f"File {destination} already exists! Use overwrite option to overwrite."
568
+ )
413
569
  else:
414
570
  with safe_write(destination) as f:
415
571
  cctx = zstd.ZstdCompressor(level=3)
@@ -417,15 +573,13 @@ class LDB(DB):
417
573
  pickle.dump(state, z, protocol=5)
418
574
 
419
575
  # Restore numba dict
420
- if cls.search_engine is not None:
576
+ if include_search and cls.search_engine is not None:
421
577
  cls.search_engine.vectorizer.features_ = nb_dict
422
578
 
423
579
  @classmethod
424
- def load(cls, filename: str, path="."):
580
+ def load(cls, filename: str, path=".", restore_search=False):
425
581
  """Load class state from file."""
426
- dest = Path(path) / f"{Path(filename).stem}.pkl.zst"
427
- if not dest.exists():
428
- dest = dest.with_suffix(".pkl")
582
+ dest = Path(path) / filename
429
583
  if not dest.exists():
430
584
  raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), dest)
431
585
 
@@ -433,12 +587,15 @@ class LDB(DB):
433
587
  with open(dest, "rb") as f, dctx.stream_reader(f) as z:
434
588
  state = pickle.load(z)
435
589
 
436
- cls.authors = state['authors']
437
- cls.publis = state['publis']
438
- cls.keys = state['keys']
439
- cls.search_engine = state['search_engine']
590
+ cls.authors = state["authors"]
591
+ cls.publis = state["publis"]
592
+ cls.keys = state["keys"]
593
+ cls.search_engine = state["search_engine"]
440
594
 
441
- if cls.search_engine is not None:
595
+ if restore_search:
596
+ cls._build_search_engine()
597
+ cls.dump(filename=filename, path=path, overwrite=True, include_search=True)
598
+ elif cls.search_engine is not None:
442
599
  cls.search_engine.vectorizer.features_ = cls.numbify_dict(
443
600
  cls.search_engine.vectorizer.features_
444
601
  )
@@ -447,25 +604,48 @@ class LDB(DB):
447
604
  cls._initialized = True
448
605
 
449
606
  @classmethod
450
- def dump_db(cls):
451
- DATA_DIR.mkdir(parents=True, exist_ok=True)
452
- cls.dump(LDB_STEM, path=DATA_DIR, overwrite=True)
607
+ def dump_db(cls, include_search=True):
608
+ destination = cls.parameters.io.destination
609
+ destination.parent.mkdir(parents=True, exist_ok=True)
610
+ cls.dump(
611
+ destination.name,
612
+ path=destination.parent,
613
+ overwrite=True,
614
+ include_search=include_search,
615
+ )
453
616
 
454
617
  @classmethod
455
- def load_db(cls):
618
+ def load_db(cls, restore_search=False):
619
+ destination = cls.parameters.io.destination
456
620
  try:
457
- cls.load(LDB_STEM, path=DATA_DIR)
621
+ cls.load(
622
+ destination.name, path=destination.parent, restore_search=restore_search
623
+ )
458
624
  except FileNotFoundError:
459
- logger.warning("No LDB installed. Build or retrieve before using.")
625
+ logger.warning("No LDB found. Building from source...")
626
+ cls.build_db()
627
+ cls.dump_db()
628
+ except TypeError as e:
629
+ if "code expected at most" in str(e):
630
+ logger.warning(
631
+ "LDB file incompatible with this Python version. Rebuilding from source..."
632
+ )
633
+ cls.build_db()
634
+ cls.dump_db()
635
+ else:
636
+ raise
460
637
 
461
- @staticmethod
462
- def delete_db():
463
- if LDB_PATH.exists():
464
- LDB_PATH.unlink()
638
+ @classmethod
639
+ def delete_db(cls):
640
+ destination = cls.parameters.io.destination
641
+ if destination.exists():
642
+ destination.unlink()
465
643
 
466
644
  @staticmethod
467
645
  def numbify_dict(input_dict):
468
- nb_dict = nb.typed.Dict.empty(key_type=nb.types.unicode_type, value_type=nb.types.int64)
646
+ nb_dict = nb.typed.Dict.empty(
647
+ key_type=nb.types.unicode_type, value_type=nb.types.int64
648
+ )
469
649
  for k, v in input_dict.items():
470
650
  nb_dict[k] = v
471
651
  return nb_dict
@@ -473,6 +653,21 @@ class LDB(DB):
473
653
 
474
654
  @dataclass(repr=False)
475
655
  class LDBAuthor(Author, LDB):
656
+ """
657
+ Author from the LDB (Local DBLP) database.
658
+
659
+ LDB provides local access to DBLP data without rate limiting.
660
+
661
+ Parameters
662
+ ----------
663
+ name: :class:`str`
664
+ The author's name.
665
+ key: :class:`str`
666
+ DBLP person identifier (pid).
667
+ aliases: :class:`list`
668
+ Alternative names for the author.
669
+ """
670
+
476
671
  key: str
477
672
  aliases: list = field(default_factory=list)
478
673
 
@@ -484,9 +679,29 @@ class LDBAuthor(Author, LDB):
484
679
  return LDB.from_author(self)
485
680
 
486
681
 
487
-
488
682
  @dataclass(repr=False)
489
683
  class LDBPublication(Publication, LDB):
684
+ """
685
+ Publication from the LDB (Local DBLP) database.
686
+
687
+ Parameters
688
+ ----------
689
+ title: :class:`str`
690
+ Publication title.
691
+ authors: :class:`list`
692
+ List of :class:`LDBAuthor` objects.
693
+ venue: :class:`str`
694
+ Publication venue.
695
+ type: :class:`str`
696
+ Publication type.
697
+ year: :class:`int`
698
+ Publication year.
699
+ key: :class:`str`
700
+ DBLP record key.
701
+ metadata: :class:`dict`
702
+ Additional metadata (URL, streams, pages).
703
+ """
704
+
490
705
  key: str
491
706
  metadata: dict = field(default_factory=dict)
492
707
 
@@ -497,5 +712,5 @@ class LDBPublication(Publication, LDB):
497
712
  @property
498
713
  def stream(self):
499
714
  if "streams" in self.metadata:
500
- return f'https://dblp.org/streams/{self.metadata["streams"][0]}'
715
+ return f"https://dblp.org/streams/{self.metadata['streams'][0]}"
501
716
  return None