gismap 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gismap/build.py +23 -1
- gismap/gisgraphs/builder.py +23 -8
- gismap/gisgraphs/graph.py +15 -0
- gismap/gisgraphs/widget.py +28 -8
- gismap/lab/egomap.py +29 -7
- gismap/lab/expansion.py +35 -1
- gismap/lab/lab_author.py +30 -0
- gismap/lab/labmap.py +44 -3
- gismap/lab_examples/cedric.py +19 -6
- gismap/lab_examples/lamsade.py +9 -7
- gismap/lab_examples/toulouse.py +6 -2
- gismap/search.py +61 -1
- gismap/sources/dblp.py +22 -0
- gismap/sources/dblp_ttl.py +30 -11
- gismap/sources/hal.py +38 -0
- gismap/sources/ldb.py +315 -100
- gismap/sources/models.py +83 -0
- gismap/sources/multi.py +65 -0
- gismap/utils/common.py +58 -1
- gismap/utils/text.py +1 -1
- gismap/utils/zlist.py +24 -4
- {gismap-0.4.0.dist-info → gismap-0.4.1.dist-info}/METADATA +11 -9
- gismap-0.4.1.dist-info/RECORD +43 -0
- gismap-0.4.0.dist-info/RECORD +0 -43
- {gismap-0.4.0.dist-info → gismap-0.4.1.dist-info}/WHEEL +0 -0
- {gismap-0.4.0.dist-info → gismap-0.4.1.dist-info}/licenses/AUTHORS.md +0 -0
gismap/sources/dblp_ttl.py
CHANGED
|
@@ -8,24 +8,39 @@ from tqdm.auto import tqdm
|
|
|
8
8
|
from gismap.utils.requests import session
|
|
9
9
|
from gismap.sources.dblp import DBLP_TYPES
|
|
10
10
|
|
|
11
|
-
key_re = r
|
|
11
|
+
key_re = r"<https://dblp.org/rec/([^>]+)>"
|
|
12
12
|
title_re = r'.*?dblp:title\s+"([^"]+)"'
|
|
13
|
-
type_re = r
|
|
14
|
-
authors_re = r
|
|
15
|
-
url_re = r
|
|
16
|
-
stream_re = r
|
|
13
|
+
type_re = r".*?dblp:bibtexType\s+bibtex:(\w+)"
|
|
14
|
+
authors_re = r".*?dblp:hasSignature\s+(\[.*\])\s*;"
|
|
15
|
+
url_re = r"(?:.*?dblp:primaryDocumentPage <([^>]+)>)?"
|
|
16
|
+
stream_re = r"(?:.*?dblp:publishedInStream ([^;]+) ;)?"
|
|
17
17
|
pages_re = r'(?:.*?dblp:pagination "([^"]+)")?'
|
|
18
18
|
venue_re = r'(?:.*?dblp:publishedIn\s+"([^"]+?)")?'
|
|
19
19
|
year_re = r'.*?"(\d{4})"\^\^<http://www.w3.org/2001/XMLSchema#gYear>'
|
|
20
20
|
|
|
21
|
-
pub_re = re.compile(
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
21
|
+
pub_re = re.compile(
|
|
22
|
+
"".join(
|
|
23
|
+
[
|
|
24
|
+
key_re,
|
|
25
|
+
title_re,
|
|
26
|
+
type_re,
|
|
27
|
+
authors_re,
|
|
28
|
+
url_re,
|
|
29
|
+
stream_re,
|
|
30
|
+
pages_re,
|
|
31
|
+
venue_re,
|
|
32
|
+
year_re,
|
|
33
|
+
]
|
|
34
|
+
),
|
|
35
|
+
flags=re.S,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
streams_re = re.compile(r"<https://dblp.org/streams/((?:conf|journals)/[^>]+)>")
|
|
25
39
|
|
|
26
40
|
authid_re = re.compile(
|
|
27
41
|
r'\[.*?signatureDblpName\s*?"([^"]+?)(?:\s+\d+)?".*?signatureCreator\s*<https://dblp.org/pid/([^>]+?)>.*?]',
|
|
28
|
-
flags=re.S
|
|
42
|
+
flags=re.S,
|
|
43
|
+
)
|
|
29
44
|
|
|
30
45
|
|
|
31
46
|
def parse_block(dblp_block):
|
|
@@ -100,12 +115,14 @@ def get_stream(source, chunk_size=1024 * 64):
|
|
|
100
115
|
return None
|
|
101
116
|
total = source.stat().st_size
|
|
102
117
|
with source.open("rb") as file_handle:
|
|
118
|
+
|
|
103
119
|
def read_chunks():
|
|
104
120
|
while True:
|
|
105
121
|
chunk = file_handle.read(chunk_size)
|
|
106
122
|
if not chunk:
|
|
107
123
|
break
|
|
108
124
|
yield chunk
|
|
125
|
+
|
|
109
126
|
yield read_chunks(), total
|
|
110
127
|
|
|
111
128
|
|
|
@@ -136,7 +153,9 @@ def publis_streamer(source, chunk_size=1024 * 64, encoding="unicode_escape"):
|
|
|
136
153
|
Year of publication.
|
|
137
154
|
"""
|
|
138
155
|
with get_stream(source, chunk_size=chunk_size) as (stream, total):
|
|
139
|
-
with tqdm(
|
|
156
|
+
with tqdm(
|
|
157
|
+
total=total, unit="B", unit_scale=True, unit_divisor=1024, desc="Processing"
|
|
158
|
+
) as pbar:
|
|
140
159
|
decomp = zlib.decompressobj(16 + zlib.MAX_WBITS)
|
|
141
160
|
text_buffer = ""
|
|
142
161
|
for chunk in stream:
|
gismap/sources/hal.py
CHANGED
|
@@ -180,6 +180,23 @@ class HAL(DB):
|
|
|
180
180
|
|
|
181
181
|
@dataclass(repr=False)
|
|
182
182
|
class HALAuthor(Author, HAL):
|
|
183
|
+
"""
|
|
184
|
+
Author from the HAL (Hyper Articles en Ligne) database.
|
|
185
|
+
|
|
186
|
+
HAL is a French open archive for scholarly publications.
|
|
187
|
+
|
|
188
|
+
Parameters
|
|
189
|
+
----------
|
|
190
|
+
name: :class:`str`
|
|
191
|
+
The author's name.
|
|
192
|
+
key: :class:`str` or :class:`int`, optional
|
|
193
|
+
HAL identifier for the author.
|
|
194
|
+
key_type: :class:`str`, optional
|
|
195
|
+
Type of key ('pid', 'fullname', or None for idHal).
|
|
196
|
+
aliases: :class:`list`
|
|
197
|
+
Alternative names for the author.
|
|
198
|
+
"""
|
|
199
|
+
|
|
183
200
|
key: str | int = None
|
|
184
201
|
key_type: str = None
|
|
185
202
|
aliases: list = field(default_factory=list)
|
|
@@ -277,6 +294,27 @@ HAL_KEYS = {
|
|
|
277
294
|
|
|
278
295
|
@dataclass(repr=False)
|
|
279
296
|
class HALPublication(Publication, HAL):
|
|
297
|
+
"""
|
|
298
|
+
Publication from the HAL database.
|
|
299
|
+
|
|
300
|
+
Parameters
|
|
301
|
+
----------
|
|
302
|
+
title: :class:`str`
|
|
303
|
+
Publication title.
|
|
304
|
+
authors: :class:`list`
|
|
305
|
+
List of :class:`HALAuthor` objects.
|
|
306
|
+
venue: :class:`str`
|
|
307
|
+
Publication venue.
|
|
308
|
+
type: :class:`str`
|
|
309
|
+
Publication type.
|
|
310
|
+
year: :class:`int`
|
|
311
|
+
Publication year.
|
|
312
|
+
key: :class:`str`
|
|
313
|
+
HAL document identifier.
|
|
314
|
+
metadata: :class:`dict`
|
|
315
|
+
Additional metadata (abstract, URL, etc.).
|
|
316
|
+
"""
|
|
317
|
+
|
|
280
318
|
key: str
|
|
281
319
|
metadata: dict = field(default_factory=dict)
|
|
282
320
|
|