gismap 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gismap/__init__.py +1 -0
- gismap/build.py +26 -0
- gismap/gisgraphs/builder.py +23 -8
- gismap/gisgraphs/graph.py +15 -0
- gismap/gisgraphs/widget.py +23 -3
- gismap/lab/egomap.py +29 -7
- gismap/lab/expansion.py +35 -1
- gismap/lab/lab_author.py +34 -7
- gismap/lab/labmap.py +46 -6
- gismap/lab_examples/cedric.py +19 -6
- gismap/lab_examples/lamsade.py +45 -0
- gismap/lab_examples/toulouse.py +6 -2
- gismap/search.py +61 -1
- gismap/sources/dblp.py +23 -1
- gismap/sources/dblp_ttl.py +187 -0
- gismap/sources/hal.py +40 -2
- gismap/sources/ldb.py +716 -0
- gismap/sources/models.py +83 -0
- gismap/sources/multi.py +67 -2
- gismap/utils/common.py +73 -11
- gismap/utils/logger.py +2 -0
- gismap/utils/requests.py +3 -1
- gismap/utils/text.py +1 -1
- gismap/utils/zlist.py +88 -0
- {gismap-0.3.0.dist-info → gismap-0.4.1.dist-info}/METADATA +26 -11
- gismap-0.4.1.dist-info/RECORD +43 -0
- {gismap-0.3.0.dist-info → gismap-0.4.1.dist-info}/WHEEL +1 -1
- gismap-0.3.0.dist-info/RECORD +0 -38
- {gismap-0.3.0.dist-info → gismap-0.4.1.dist-info}/licenses/AUTHORS.md +0 -0
gismap/__init__.py
CHANGED
|
@@ -4,6 +4,7 @@ from importlib.metadata import metadata
|
|
|
4
4
|
|
|
5
5
|
from gismap.sources.hal import HAL as HAL, HALAuthor as HALAuthor
|
|
6
6
|
from gismap.sources.dblp import DBLP as DBLP, DBLPAuthor as DBLPAuthor
|
|
7
|
+
from gismap.sources.ldb import LDB as LDB, LDBAuthor as LDBAuthor
|
|
7
8
|
from gismap.utils.common import get_classes as get_classes
|
|
8
9
|
from gismap.gismo import make_gismo as make_gismo
|
|
9
10
|
from gismap.search import (
|
gismap/build.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Build script for creating the LDB (Local DBLP) database.
|
|
3
|
+
|
|
4
|
+
Run as a module to download and process the DBLP dataset:
|
|
5
|
+
|
|
6
|
+
python -m gismap.build
|
|
7
|
+
|
|
8
|
+
This will fetch the DBLP RDF dump from the website and create a compressed local database.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
if __name__ == "__main__":
|
|
12
|
+
import argparse
|
|
13
|
+
from gismap.sources.ldb import LDB
|
|
14
|
+
|
|
15
|
+
parser = argparse.ArgumentParser(
|
|
16
|
+
description="Build LDB database from DBLP TTL dump."
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"--no-search",
|
|
20
|
+
action="store_true",
|
|
21
|
+
help="Exclude search engine from output (for GitHub assets).",
|
|
22
|
+
)
|
|
23
|
+
args = parser.parse_args()
|
|
24
|
+
|
|
25
|
+
LDB.build_db()
|
|
26
|
+
LDB.dump_db(include_search=not args.no_search)
|
gismap/gisgraphs/builder.py
CHANGED
|
@@ -27,22 +27,37 @@ gislink = tags.a(
|
|
|
27
27
|
|
|
28
28
|
def make_vis(lab, **kwargs):
|
|
29
29
|
"""
|
|
30
|
+
Generate HTML visualization of a lab's collaboration network.
|
|
31
|
+
|
|
30
32
|
Parameters
|
|
31
33
|
----------
|
|
32
34
|
lab: :class:`~gismap.lab.labmap.LabMap`
|
|
33
35
|
Lab to display.
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
uid: :class:`str`, optional
|
|
37
|
+
Unique identifier for DOM elements. Auto-generated if not provided.
|
|
38
|
+
vis_url: :class:`str`, optional
|
|
39
|
+
URL to vis-network library.
|
|
40
|
+
groups: :class:`dict`, optional
|
|
41
|
+
Group styling configuration.
|
|
42
|
+
draw_legend: :class:`bool`, optional
|
|
43
|
+
Whether to draw the legend. Defaults to True if multiple groups.
|
|
44
|
+
physics: :class:`dict`, optional
|
|
45
|
+
Physics engine configuration.
|
|
46
|
+
nodes_options: :class:`dict`, optional
|
|
47
|
+
Node styling options.
|
|
48
|
+
edges_options: :class:`dict`, optional
|
|
49
|
+
Edge styling options.
|
|
50
|
+
interaction_options: :class:`dict`, optional
|
|
51
|
+
Interaction settings.
|
|
52
|
+
style: :class:`string.Template`, optional
|
|
53
|
+
CSS template.
|
|
54
|
+
script: :class:`string.Template`, optional
|
|
55
|
+
JavaScript template.
|
|
41
56
|
|
|
42
57
|
Returns
|
|
43
58
|
-------
|
|
44
59
|
:class:`str`
|
|
45
|
-
HTML code.
|
|
60
|
+
HTML code as a string.
|
|
46
61
|
"""
|
|
47
62
|
uid = kwargs.pop("uid", None)
|
|
48
63
|
if uid is None:
|
gismap/gisgraphs/graph.py
CHANGED
|
@@ -21,6 +21,21 @@ def initials(name):
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def linkify(name, url):
|
|
24
|
+
"""
|
|
25
|
+
Wrap a name in an HTML link if URL is provided.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
name : :class:`str`
|
|
30
|
+
Display text.
|
|
31
|
+
url : :class:`str` or None
|
|
32
|
+
Target URL, or None for no link.
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
:class:`str`
|
|
37
|
+
HTML anchor tag or span.
|
|
38
|
+
"""
|
|
24
39
|
if url:
|
|
25
40
|
return f'<a href="{url}" target="_blank">{name}</a>'
|
|
26
41
|
else:
|
gismap/gisgraphs/widget.py
CHANGED
|
@@ -97,6 +97,14 @@ class GismapWidget:
|
|
|
97
97
|
self.show = True
|
|
98
98
|
|
|
99
99
|
def html(self):
|
|
100
|
+
"""
|
|
101
|
+
Generate the HTML visualization based on widget inputs.
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
:class:`str`
|
|
106
|
+
HTML content for the collaboration graph.
|
|
107
|
+
"""
|
|
100
108
|
dbs = (
|
|
101
109
|
"hal"
|
|
102
110
|
if self.dbs.value == "HAL"
|
|
@@ -128,6 +136,20 @@ class GismapWidget:
|
|
|
128
136
|
return lab.html()
|
|
129
137
|
|
|
130
138
|
def compute_function(self, b, show=True):
|
|
139
|
+
"""
|
|
140
|
+
Handle compute button click and generate visualization.
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
b : :class:`ipywidgets.Button`
|
|
145
|
+
The button widget that triggered this callback.
|
|
146
|
+
show : :class:`bool`, default=True
|
|
147
|
+
Whether to display the result in the widget.
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
None
|
|
152
|
+
"""
|
|
131
153
|
self.show = show
|
|
132
154
|
full = self.html()
|
|
133
155
|
b64 = base64.b64encode(
|
|
@@ -135,9 +157,7 @@ class GismapWidget:
|
|
|
135
157
|
).decode("utf8")
|
|
136
158
|
payload = f"data:text/html;base64,{b64}"
|
|
137
159
|
savename = safe_filename(self.names.value)
|
|
138
|
-
link_html =
|
|
139
|
-
f"<a href='{payload}' download='{savename}'>Download the Map!</a>"
|
|
140
|
-
)
|
|
160
|
+
link_html = f"<a href='{payload}' download='{savename}'>Download the Map!</a>"
|
|
141
161
|
self.save_link.value = link_html
|
|
142
162
|
if show:
|
|
143
163
|
self.out.clear_output()
|
gismap/lab/egomap.py
CHANGED
|
@@ -4,19 +4,27 @@ from gismap.lab.lab_author import LabAuthor
|
|
|
4
4
|
|
|
5
5
|
class EgoMap(LabMap):
|
|
6
6
|
"""
|
|
7
|
+
Egocentric view of a researcher's collaboration network.
|
|
8
|
+
|
|
9
|
+
Displays the *star* (central researcher), their *planets* (direct co-authors),
|
|
10
|
+
and optionally *moons* (co-authors of co-authors).
|
|
11
|
+
|
|
7
12
|
Parameters
|
|
8
13
|
----------
|
|
9
|
-
star
|
|
10
|
-
|
|
11
|
-
|
|
14
|
+
star: :class:`str` or :class:`~gismap.lab.lab_author.LabAuthor`
|
|
15
|
+
The central researcher. Can be a name string or LabAuthor object.
|
|
16
|
+
*args
|
|
17
|
+
Passed to :class:`~gismap.lab.labmap.LabMap`.
|
|
18
|
+
**kwargs
|
|
19
|
+
Passed to :class:`~gismap.lab.labmap.LabMap`.
|
|
12
20
|
|
|
13
21
|
Examples
|
|
14
22
|
--------
|
|
15
23
|
|
|
16
|
-
>>> dang = EgoMap("The-Dang Huynh"
|
|
17
|
-
>>> dang.build(target=
|
|
18
|
-
>>> sorted(a.name for a in dang.authors.values()) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
|
|
19
|
-
['Bruno Kauffmann', '
|
|
24
|
+
>>> dang = EgoMap("The-Dang Huynh")
|
|
25
|
+
>>> dang.build(target=20)
|
|
26
|
+
>>> sorted(a.name for a in dang.authors.values() if len(a.name.split())<3) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
|
|
27
|
+
['Bruno Kauffmann', 'Diego Perino', 'Dohy Hong', 'Fabien Mathieu', 'François Baccelli',...]
|
|
20
28
|
"""
|
|
21
29
|
|
|
22
30
|
def __init__(self, star, *args, **kwargs):
|
|
@@ -31,6 +39,20 @@ class EgoMap(LabMap):
|
|
|
31
39
|
yield self.star
|
|
32
40
|
|
|
33
41
|
def build(self, **kwargs):
|
|
42
|
+
"""
|
|
43
|
+
Build the ego network by fetching publications and adding planets/moons.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
target : :class:`int`, default=50
|
|
48
|
+
Target number of authors in the final map.
|
|
49
|
+
**kwargs
|
|
50
|
+
Passed to :meth:`~gismap.lab.labmap.LabMap.expand`.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
None
|
|
55
|
+
"""
|
|
34
56
|
target = kwargs.pop("target", 50)
|
|
35
57
|
self.update_authors(desc="Star metadata")
|
|
36
58
|
self.update_publis(desc="Star publications")
|
gismap/lab/expansion.py
CHANGED
|
@@ -139,7 +139,14 @@ def get_prospects(lab):
|
|
|
139
139
|
@dataclass
|
|
140
140
|
class Member:
|
|
141
141
|
"""
|
|
142
|
-
Basic information
|
|
142
|
+
Basic information about a lab member for name matching.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
name : :class:`str`
|
|
147
|
+
Normalized name.
|
|
148
|
+
key : :class:`str`
|
|
149
|
+
Author key.
|
|
143
150
|
"""
|
|
144
151
|
|
|
145
152
|
name: str
|
|
@@ -190,6 +197,33 @@ def trim_sources(author):
|
|
|
190
197
|
def proper_prospects(
|
|
191
198
|
lab, length_impact=0.05, threshold=80, n_range=4, max_new=None, trim=True
|
|
192
199
|
):
|
|
200
|
+
"""
|
|
201
|
+
Find and rank external collaborators for potential lab expansion.
|
|
202
|
+
|
|
203
|
+
Identifies authors from publications who are not already lab members,
|
|
204
|
+
groups them by name similarity, and ranks by collaboration strength.
|
|
205
|
+
|
|
206
|
+
Parameters
|
|
207
|
+
----------
|
|
208
|
+
lab : :class:`~gismap.lab.labmap.LabMap`
|
|
209
|
+
Reference lab.
|
|
210
|
+
length_impact : :class:`float`, default=0.05
|
|
211
|
+
Length impact for name similarity matching.
|
|
212
|
+
threshold : :class:`int`, default=80
|
|
213
|
+
Similarity threshold for grouping authors.
|
|
214
|
+
n_range : :class:`int`, default=4
|
|
215
|
+
N-gram range for name comparison.
|
|
216
|
+
max_new : :class:`int`, optional
|
|
217
|
+
Maximum number of new authors to return.
|
|
218
|
+
trim : :class:`bool`, default=True
|
|
219
|
+
If True, keep only one source per database for each author.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
:class:`tuple`
|
|
224
|
+
(existing, new_rosetta) where existing maps external keys to lab member keys,
|
|
225
|
+
and new_rosetta maps source keys to new LabAuthor objects.
|
|
226
|
+
"""
|
|
193
227
|
member_names = get_member_names(lab)
|
|
194
228
|
prospects = get_prospects(lab)
|
|
195
229
|
|
gismap/lab/lab_author.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
2
|
import re
|
|
3
3
|
|
|
4
|
-
from gismap import get_classes
|
|
4
|
+
from gismap import get_classes
|
|
5
5
|
from gismap.sources.models import DB, db_class_to_auth_class
|
|
6
6
|
from gismap.sources.multi import SourcedAuthor, sort_author_sources
|
|
7
7
|
from gismap.utils.common import LazyRepr, list_of_objects
|
|
8
8
|
from gismap.utils.logger import logger
|
|
9
9
|
|
|
10
10
|
db_dict = get_classes(DB, key="db_name")
|
|
11
|
-
default_dbs = [
|
|
11
|
+
default_dbs = ["hal", "ldb"]
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
@dataclass(repr=False)
|
|
@@ -27,8 +27,6 @@ class AuthorMetadata(LazyRepr):
|
|
|
27
27
|
Group of the author.
|
|
28
28
|
position: :class:`tuple`
|
|
29
29
|
Coordinates of the author.
|
|
30
|
-
keys: :class:`dict`
|
|
31
|
-
Some DB key values of the author.
|
|
32
30
|
"""
|
|
33
31
|
|
|
34
32
|
url: str = None
|
|
@@ -46,12 +44,11 @@ class LabAuthor(SourcedAuthor):
|
|
|
46
44
|
|
|
47
45
|
Improper key/values are ignored (with a warning).
|
|
48
46
|
|
|
49
|
-
|
|
50
|
-
>>> dummy= LabAuthor("My Name(img: https://my.url.img, group:me,url:https://mysite.org,hal:key1,dblp:toto,badkey:hello,no_colon_separator)")
|
|
47
|
+
>>> dummy= LabAuthor("My Name(img: https://my.url.img, group:me,url:https://mysite.org,hal:key1,ldb:toto,badkey:hello,no_colon_separator)")
|
|
51
48
|
>>> dummy.metadata
|
|
52
49
|
AuthorMetadata(url='https://mysite.org', img='https://my.url.img', group='me')
|
|
53
50
|
>>> dummy.sources
|
|
54
|
-
[HALAuthor(name='My Name', key='key1'),
|
|
51
|
+
[HALAuthor(name='My Name', key='key1'), LDBAuthor(name='My Name', key='toto')]
|
|
55
52
|
|
|
56
53
|
You can enter multiple keys for the same DB. HAL key types are automatically detected.
|
|
57
54
|
|
|
@@ -59,6 +56,7 @@ class LabAuthor(SourcedAuthor):
|
|
|
59
56
|
>>> dummy2.sources
|
|
60
57
|
[HALAuthor(name='My Name', key='key1'), HALAuthor(name='My Name', key='123456', key_type='pid'), HALAuthor(name='My Name', key='My Other Name', key_type='fullname')]
|
|
61
58
|
"""
|
|
59
|
+
|
|
62
60
|
metadata: AuthorMetadata = field(default_factory=AuthorMetadata)
|
|
63
61
|
|
|
64
62
|
def auto_img(self):
|
|
@@ -119,12 +117,41 @@ class LabAuthor(SourcedAuthor):
|
|
|
119
117
|
|
|
120
118
|
|
|
121
119
|
def labify_author(author, rosetta):
|
|
120
|
+
"""
|
|
121
|
+
Convert a database author to a LabAuthor if possible.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
author : :class:`~gismap.sources.models.Author`
|
|
126
|
+
Author to convert.
|
|
127
|
+
rosetta : :class:`dict`
|
|
128
|
+
Mapping from keys/names to LabAuthor objects.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
:class:`~gismap.lab.lab_author.LabAuthor` or original author
|
|
133
|
+
LabAuthor if found in rosetta, otherwise the original author.
|
|
134
|
+
"""
|
|
122
135
|
if isinstance(author, LabAuthor):
|
|
123
136
|
return author
|
|
124
137
|
return rosetta.get(author.key, rosetta.get(author.name, author))
|
|
125
138
|
|
|
126
139
|
|
|
127
140
|
def labify_publications(pubs, rosetta):
|
|
141
|
+
"""
|
|
142
|
+
Convert publication authors to LabAuthors in place.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
pubs : :class:`list`
|
|
147
|
+
Publications to update.
|
|
148
|
+
rosetta : :class:`dict`
|
|
149
|
+
Mapping from keys/names to LabAuthor objects.
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
None
|
|
154
|
+
"""
|
|
128
155
|
for pub in pubs:
|
|
129
156
|
pub.authors = [labify_author(a, rosetta) for a in pub.authors]
|
|
130
157
|
for source in getattr(pub, "sources", []):
|
gismap/lab/labmap.py
CHANGED
|
@@ -38,7 +38,7 @@ class LabMap(MixInIO):
|
|
|
38
38
|
----------
|
|
39
39
|
name: :class:`str`
|
|
40
40
|
Name of the lab. Can be set as class or instance attribute.
|
|
41
|
-
dbs: :class:`list`, default=[:class:`~gismap.sources.hal.HAL`, :class:`~gismap.sources.
|
|
41
|
+
dbs: :class:`list`, default=[:class:`~gismap.sources.hal.HAL`, :class:`~gismap.sources.ldb.LDB`]
|
|
42
42
|
List of DB sources to use.
|
|
43
43
|
|
|
44
44
|
|
|
@@ -57,8 +57,7 @@ class LabMap(MixInIO):
|
|
|
57
57
|
def __init__(self, name=None, dbs=None):
|
|
58
58
|
if name is not None:
|
|
59
59
|
self.name = name
|
|
60
|
-
|
|
61
|
-
self.dbs = list_of_objects(dbs, db_dict, default=default_dbs)
|
|
60
|
+
self.dbs = dbs
|
|
62
61
|
self.author_selectors = [author_taboo_filter()]
|
|
63
62
|
self.publication_selectors = [
|
|
64
63
|
publication_size_filter(),
|
|
@@ -92,7 +91,9 @@ class LabMap(MixInIO):
|
|
|
92
91
|
if not all(f(author) for f in self.author_selectors):
|
|
93
92
|
continue
|
|
94
93
|
if len(author.sources) == 0:
|
|
95
|
-
author.auto_sources(
|
|
94
|
+
author.auto_sources(
|
|
95
|
+
dbs=list_of_objects(self.dbs, db_dict, default=default_dbs)
|
|
96
|
+
)
|
|
96
97
|
if author.sources:
|
|
97
98
|
self.authors[author.key] = author
|
|
98
99
|
if author.metadata.img is None:
|
|
@@ -153,9 +154,36 @@ class LabMap(MixInIO):
|
|
|
153
154
|
return None
|
|
154
155
|
|
|
155
156
|
def html(self, **kwargs):
|
|
157
|
+
"""
|
|
158
|
+
Generate HTML representation of the collaboration graph.
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
**kwargs
|
|
163
|
+
Passed to :func:`~gismap.gisgraphs.builder.make_vis`.
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
:class:`str`
|
|
168
|
+
HTML content as a string.
|
|
169
|
+
"""
|
|
156
170
|
return make_vis(self, **kwargs)
|
|
157
171
|
|
|
158
172
|
def save_html(self, name=None, **kwargs):
|
|
173
|
+
"""
|
|
174
|
+
Save the collaboration graph as an HTML file.
|
|
175
|
+
|
|
176
|
+
Parameters
|
|
177
|
+
----------
|
|
178
|
+
name: :class:`str`, optional
|
|
179
|
+
Output filename. Defaults to lab name.
|
|
180
|
+
**kwargs
|
|
181
|
+
Passed to :meth:`html`.
|
|
182
|
+
|
|
183
|
+
Returns
|
|
184
|
+
-------
|
|
185
|
+
None
|
|
186
|
+
"""
|
|
159
187
|
if name is None:
|
|
160
188
|
name = self.name
|
|
161
189
|
name = Path(name).with_suffix(".html")
|
|
@@ -163,6 +191,18 @@ class LabMap(MixInIO):
|
|
|
163
191
|
f.write(self.html(**kwargs))
|
|
164
192
|
|
|
165
193
|
def show_html(self, **kwargs):
|
|
194
|
+
"""
|
|
195
|
+
Display the collaboration graph in a Jupyter notebook.
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
**kwargs
|
|
200
|
+
Passed to :meth:`html`.
|
|
201
|
+
|
|
202
|
+
Returns
|
|
203
|
+
-------
|
|
204
|
+
None
|
|
205
|
+
"""
|
|
166
206
|
display(HTML(self.html(**kwargs)))
|
|
167
207
|
|
|
168
208
|
|
|
@@ -175,9 +215,9 @@ class ListMap(LabMap):
|
|
|
175
215
|
author_list: :class:`list` of :class:`str`
|
|
176
216
|
List of authors names.
|
|
177
217
|
args: :class:`list`
|
|
178
|
-
Arguments to pass to the :class:`~gismap.lab.lab.Lab`
|
|
218
|
+
Arguments to pass to the :class:`~gismap.lab.lab.Lab` constructor.
|
|
179
219
|
kwargs: :class:`dict`
|
|
180
|
-
Keyword arguments to pass to the :class:`~gismap.lab.lab.Lab`
|
|
220
|
+
Keyword arguments to pass to the :class:`~gismap.lab.lab.Lab` constructor.
|
|
181
221
|
"""
|
|
182
222
|
|
|
183
223
|
def __init__(self, author_list, *args, **kwargs):
|
gismap/lab_examples/cedric.py
CHANGED
|
@@ -10,37 +10,50 @@ class CedricMap(LabMap):
|
|
|
10
10
|
Class for handling a CNAM Cedric team from its name.
|
|
11
11
|
Default to `roc` team.
|
|
12
12
|
"""
|
|
13
|
+
|
|
13
14
|
name = "roc"
|
|
14
15
|
base_url = "https://cedric.cnam.fr"
|
|
15
16
|
|
|
16
17
|
def _author_iterator(self):
|
|
17
18
|
soup = Soup(get(f"{self.base_url}/equipes/{self.name}/"), features="lxml")
|
|
18
|
-
searchers = [
|
|
19
|
+
searchers = [
|
|
20
|
+
li.a
|
|
21
|
+
for ul in soup.find("div", {"id": "annuaire"})("ul")[:3]
|
|
22
|
+
for li in ul("li")
|
|
23
|
+
]
|
|
19
24
|
done = set()
|
|
20
25
|
for searcher in searchers:
|
|
21
|
-
name = searcher.text.split(
|
|
26
|
+
name = searcher.text.split("(")[0].strip()
|
|
22
27
|
if name in done:
|
|
23
28
|
continue
|
|
24
29
|
url = f"{self.base_url}{searcher['href']}"
|
|
25
30
|
sousoup = Soup(get(url), features="lxml")
|
|
26
|
-
img = sousoup.find(
|
|
31
|
+
img = sousoup.find("img", {"class": "photo"})["src"]
|
|
27
32
|
response = requests.head(img, allow_redirects=True)
|
|
28
33
|
if int(response.headers.get("Content-Length")) < 3000:
|
|
29
34
|
img = None
|
|
30
35
|
done.add(name)
|
|
31
|
-
yield LabAuthor(
|
|
36
|
+
yield LabAuthor(
|
|
37
|
+
name=name,
|
|
38
|
+
metadata=AuthorMetadata(url=url, img=img, group=self.name.upper()),
|
|
39
|
+
)
|
|
32
40
|
|
|
33
41
|
|
|
34
42
|
class CedricFull(LabMap):
|
|
35
43
|
"""
|
|
36
44
|
Class for handling all CNAM Cedric teams using `https://cedric.cnam.fr/equipes` to get team names.
|
|
37
45
|
"""
|
|
46
|
+
|
|
38
47
|
name = "Cedric"
|
|
39
48
|
|
|
40
49
|
def _author_iterator(self):
|
|
41
50
|
base = "https://cedric.cnam.fr/equipes/"
|
|
42
|
-
soup =
|
|
43
|
-
teams = {
|
|
51
|
+
soup = Soup(get(base), features="lxml")
|
|
52
|
+
teams = {
|
|
53
|
+
a["href"].split("/")[-2]
|
|
54
|
+
for a in soup("a")
|
|
55
|
+
if base in a.get("href", "") and len(a["href"]) > len(base)
|
|
56
|
+
}
|
|
44
57
|
for team in teams:
|
|
45
58
|
for author in CedricMap(name=team)._author_iterator():
|
|
46
59
|
yield author
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from bs4 import BeautifulSoup as Soup
|
|
2
|
+
|
|
3
|
+
from gismap.lab import LabAuthor
|
|
4
|
+
from gismap.lab.lab_author import AuthorMetadata
|
|
5
|
+
from gismap.lab.labmap import LabMap
|
|
6
|
+
from gismap.utils.requests import get
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def lamsade_parse(div):
|
|
10
|
+
"""
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
div: :class:`~bs4.BeautifulSoup`
|
|
14
|
+
Soup of the div of one researcher
|
|
15
|
+
|
|
16
|
+
Returns
|
|
17
|
+
-------
|
|
18
|
+
:class:`tuple`
|
|
19
|
+
name, image url (or None), webpage (or None)
|
|
20
|
+
"""
|
|
21
|
+
img = div.img["src"] if div.img else None
|
|
22
|
+
url = div.a["href"] if div.a else None
|
|
23
|
+
name = div.h2.text.strip().title()
|
|
24
|
+
name = " ".join(name.split(" ", 1)[::-1])
|
|
25
|
+
return name, img, url
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Lamsade(LabMap):
|
|
29
|
+
"""
|
|
30
|
+
Class for handling the Lamsade team (Dauphine).
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
name = "Lamsade"
|
|
34
|
+
base_url = "https://www.lamsade.dauphine.fr/"
|
|
35
|
+
directory = "fr/personnes/enseignants-chercheurs-et-chercheurs.html"
|
|
36
|
+
|
|
37
|
+
def _author_iterator(self):
|
|
38
|
+
soup = Soup(get(self.base_url + self.directory), features="lxml")
|
|
39
|
+
for a in soup("div", class_="dauphinecv-item"):
|
|
40
|
+
name, img, url = lamsade_parse(a)
|
|
41
|
+
img = self.base_url + img if img else None
|
|
42
|
+
url = self.base_url + url if url else None
|
|
43
|
+
yield LabAuthor(
|
|
44
|
+
name=name, metadata=AuthorMetadata(url=url, img=img, group=self.name)
|
|
45
|
+
)
|
gismap/lab_examples/toulouse.py
CHANGED
|
@@ -29,18 +29,22 @@ class LaasMap(LabMap):
|
|
|
29
29
|
if "public_avatar" in a.img["class"]
|
|
30
30
|
else None
|
|
31
31
|
)
|
|
32
|
-
yield LabAuthor(
|
|
32
|
+
yield LabAuthor(
|
|
33
|
+
name=name,
|
|
34
|
+
metadata=AuthorMetadata(url=url, img=img, group=self.name.upper()),
|
|
35
|
+
)
|
|
33
36
|
|
|
34
37
|
|
|
35
38
|
class LaasFull(LabMap):
|
|
36
39
|
"""
|
|
37
40
|
Class for handling all LAAS teams using `https://www.laas.fr/fr/equipes/` to get team names.
|
|
38
41
|
"""
|
|
42
|
+
|
|
39
43
|
name = "LAAS"
|
|
40
44
|
|
|
41
45
|
def _author_iterator(self):
|
|
42
46
|
soup = Soup(get("https://www.laas.fr/fr/equipes/"), features="lxml")
|
|
43
|
-
teams = [a[
|
|
47
|
+
teams = [a["href"].split("/")[-2] for a in soup("a", {"class": "badge"})]
|
|
44
48
|
for team in teams:
|
|
45
49
|
for author in LaasMap(name=team)._author_iterator():
|
|
46
50
|
yield author
|
gismap/search.py
CHANGED
|
@@ -6,7 +6,17 @@ from gismap.utils.text import reduce_keywords, Corrector
|
|
|
6
6
|
|
|
7
7
|
class SearchAction:
|
|
8
8
|
"""
|
|
9
|
-
|
|
9
|
+
Base class for extracting search results from a Gismo.
|
|
10
|
+
|
|
11
|
+
Subclasses should implement :meth:`process` to define
|
|
12
|
+
how to extract results from the gismo.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
name : :class:`str`, optional
|
|
17
|
+
Name of this action (used as key in results dict).
|
|
18
|
+
post : callable, optional
|
|
19
|
+
Post-processing function applied to results.
|
|
10
20
|
"""
|
|
11
21
|
|
|
12
22
|
def __init__(self, name=None, post=None):
|
|
@@ -14,9 +24,33 @@ class SearchAction:
|
|
|
14
24
|
self.post = (lambda x: x) if post is None else post
|
|
15
25
|
|
|
16
26
|
def process(self, gismo):
|
|
27
|
+
"""
|
|
28
|
+
Extract results from the gismo. Must be implemented by subclasses.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
gismo : :class:`~gismo.gismo.Gismo`
|
|
33
|
+
The gismo to query.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
Results (type depends on subclass).
|
|
38
|
+
"""
|
|
17
39
|
raise NotImplementedError
|
|
18
40
|
|
|
19
41
|
def run(self, gismo):
|
|
42
|
+
"""
|
|
43
|
+
Execute the action and apply post-processing.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
gismo : :class:`~gismo.gismo.Gismo`
|
|
48
|
+
The gismo to query.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
Post-processed results.
|
|
53
|
+
"""
|
|
20
54
|
return self.post(self.process(gismo))
|
|
21
55
|
|
|
22
56
|
|
|
@@ -153,6 +187,19 @@ publi_template = Template("""
|
|
|
153
187
|
|
|
154
188
|
|
|
155
189
|
def publi_to_html(publi):
|
|
190
|
+
"""
|
|
191
|
+
Convert a publication to an HTML list item.
|
|
192
|
+
|
|
193
|
+
Parameters
|
|
194
|
+
----------
|
|
195
|
+
publi : :class:`~gismap.sources.models.Publication`
|
|
196
|
+
Publication to convert.
|
|
197
|
+
|
|
198
|
+
Returns
|
|
199
|
+
-------
|
|
200
|
+
:class:`str`
|
|
201
|
+
HTML list item string.
|
|
202
|
+
"""
|
|
156
203
|
dico = dict()
|
|
157
204
|
for db in ["hal", "dblp"]:
|
|
158
205
|
source = publi.sources.get(db)
|
|
@@ -167,6 +214,19 @@ def publi_to_html(publi):
|
|
|
167
214
|
|
|
168
215
|
|
|
169
216
|
def publis_to_html(publis):
|
|
217
|
+
"""
|
|
218
|
+
Convert a list of publications to an HTML unordered list.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
publis : :class:`list`
|
|
223
|
+
List of publications.
|
|
224
|
+
|
|
225
|
+
Returns
|
|
226
|
+
-------
|
|
227
|
+
:class:`str`
|
|
228
|
+
HTML unordered list string.
|
|
229
|
+
"""
|
|
170
230
|
rows = "\n".join(publi_to_html(p) for p in publis)
|
|
171
231
|
return f"<ul>\n{rows}\n</ul>"
|
|
172
232
|
|