gismap 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gismap/build.py +23 -1
- gismap/gisgraphs/builder.py +23 -8
- gismap/gisgraphs/graph.py +15 -0
- gismap/gisgraphs/widget.py +28 -8
- gismap/lab/egomap.py +29 -7
- gismap/lab/expansion.py +35 -1
- gismap/lab/lab_author.py +30 -0
- gismap/lab/labmap.py +44 -3
- gismap/lab_examples/cedric.py +19 -6
- gismap/lab_examples/lamsade.py +9 -7
- gismap/lab_examples/toulouse.py +6 -2
- gismap/search.py +61 -1
- gismap/sources/dblp.py +22 -0
- gismap/sources/dblp_ttl.py +30 -11
- gismap/sources/hal.py +38 -0
- gismap/sources/ldb.py +315 -100
- gismap/sources/models.py +83 -0
- gismap/sources/multi.py +65 -0
- gismap/utils/common.py +58 -1
- gismap/utils/text.py +1 -1
- gismap/utils/zlist.py +24 -4
- {gismap-0.4.0.dist-info → gismap-0.4.1.dist-info}/METADATA +11 -9
- gismap-0.4.1.dist-info/RECORD +43 -0
- gismap-0.4.0.dist-info/RECORD +0 -43
- {gismap-0.4.0.dist-info → gismap-0.4.1.dist-info}/WHEEL +0 -0
- {gismap-0.4.0.dist-info → gismap-0.4.1.dist-info}/licenses/AUTHORS.md +0 -0
gismap/build.py
CHANGED
|
@@ -1,4 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Build script for creating the LDB (Local DBLP) database.
|
|
3
|
+
|
|
4
|
+
Run as a module to download and process the DBLP dataset:
|
|
5
|
+
|
|
6
|
+
python -m gismap.build
|
|
7
|
+
|
|
8
|
+
This will fetch the DBLP RDF dump from the website and create a compressed local database.
|
|
9
|
+
"""
|
|
10
|
+
|
|
1
11
|
if __name__ == "__main__":
|
|
12
|
+
import argparse
|
|
2
13
|
from gismap.sources.ldb import LDB
|
|
14
|
+
|
|
15
|
+
parser = argparse.ArgumentParser(
|
|
16
|
+
description="Build LDB database from DBLP TTL dump."
|
|
17
|
+
)
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"--no-search",
|
|
20
|
+
action="store_true",
|
|
21
|
+
help="Exclude search engine from output (for GitHub assets).",
|
|
22
|
+
)
|
|
23
|
+
args = parser.parse_args()
|
|
24
|
+
|
|
3
25
|
LDB.build_db()
|
|
4
|
-
LDB.dump_db()
|
|
26
|
+
LDB.dump_db(include_search=not args.no_search)
|
gismap/gisgraphs/builder.py
CHANGED
|
@@ -27,22 +27,37 @@ gislink = tags.a(
|
|
|
27
27
|
|
|
28
28
|
def make_vis(lab, **kwargs):
|
|
29
29
|
"""
|
|
30
|
+
Generate HTML visualization of a lab's collaboration network.
|
|
31
|
+
|
|
30
32
|
Parameters
|
|
31
33
|
----------
|
|
32
34
|
lab: :class:`~gismap.lab.labmap.LabMap`
|
|
33
35
|
Lab to display.
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
uid: :class:`str`, optional
|
|
37
|
+
Unique identifier for DOM elements. Auto-generated if not provided.
|
|
38
|
+
vis_url: :class:`str`, optional
|
|
39
|
+
URL to vis-network library.
|
|
40
|
+
groups: :class:`dict`, optional
|
|
41
|
+
Group styling configuration.
|
|
42
|
+
draw_legend: :class:`bool`, optional
|
|
43
|
+
Whether to draw the legend. Defaults to True if multiple groups.
|
|
44
|
+
physics: :class:`dict`, optional
|
|
45
|
+
Physics engine configuration.
|
|
46
|
+
nodes_options: :class:`dict`, optional
|
|
47
|
+
Node styling options.
|
|
48
|
+
edges_options: :class:`dict`, optional
|
|
49
|
+
Edge styling options.
|
|
50
|
+
interaction_options: :class:`dict`, optional
|
|
51
|
+
Interaction settings.
|
|
52
|
+
style: :class:`string.Template`, optional
|
|
53
|
+
CSS template.
|
|
54
|
+
script: :class:`string.Template`, optional
|
|
55
|
+
JavaScript template.
|
|
41
56
|
|
|
42
57
|
Returns
|
|
43
58
|
-------
|
|
44
59
|
:class:`str`
|
|
45
|
-
HTML code.
|
|
60
|
+
HTML code as a string.
|
|
46
61
|
"""
|
|
47
62
|
uid = kwargs.pop("uid", None)
|
|
48
63
|
if uid is None:
|
gismap/gisgraphs/graph.py
CHANGED
|
@@ -21,6 +21,21 @@ def initials(name):
|
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
def linkify(name, url):
|
|
24
|
+
"""
|
|
25
|
+
Wrap a name in an HTML link if URL is provided.
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
name : :class:`str`
|
|
30
|
+
Display text.
|
|
31
|
+
url : :class:`str` or None
|
|
32
|
+
Target URL, or None for no link.
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
:class:`str`
|
|
37
|
+
HTML anchor tag or span.
|
|
38
|
+
"""
|
|
24
39
|
if url:
|
|
25
40
|
return f'<a href="{url}" target="_blank">{name}</a>'
|
|
26
41
|
else:
|
gismap/gisgraphs/widget.py
CHANGED
|
@@ -33,7 +33,7 @@ def safe_filename(name):
|
|
|
33
33
|
return f"gismap-{safe_str[:60]}.html"
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
place_holder = "Diego Perino, The-Dang Huynh, François Durand (hal: fradurand,
|
|
36
|
+
place_holder = "Diego Perino, The-Dang Huynh, François Durand (hal: fradurand, dblp: 38/11269), Rim Kaddah, Leonardo Linguaglossa, Céline Comte"
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
class GismapWidget:
|
|
@@ -66,7 +66,7 @@ class GismapWidget:
|
|
|
66
66
|
layout=widgets.Layout(width="50%", height="100px"),
|
|
67
67
|
)
|
|
68
68
|
self.dbs = widgets.RadioButtons(
|
|
69
|
-
options=["HAL", "
|
|
69
|
+
options=["HAL", "DBLP", "Both"],
|
|
70
70
|
description="DB(s):",
|
|
71
71
|
layout=widgets.Layout(width="80px", max_width="20%"),
|
|
72
72
|
)
|
|
@@ -97,12 +97,20 @@ class GismapWidget:
|
|
|
97
97
|
self.show = True
|
|
98
98
|
|
|
99
99
|
def html(self):
|
|
100
|
+
"""
|
|
101
|
+
Generate the HTML visualization based on widget inputs.
|
|
102
|
+
|
|
103
|
+
Returns
|
|
104
|
+
-------
|
|
105
|
+
:class:`str`
|
|
106
|
+
HTML content for the collaboration graph.
|
|
107
|
+
"""
|
|
100
108
|
dbs = (
|
|
101
109
|
"hal"
|
|
102
110
|
if self.dbs.value == "HAL"
|
|
103
|
-
else "
|
|
104
|
-
if self.dbs.value == "
|
|
105
|
-
else ["hal", "
|
|
111
|
+
else "dblp"
|
|
112
|
+
if self.dbs.value == "DBLP"
|
|
113
|
+
else ["hal", "dblp"]
|
|
106
114
|
)
|
|
107
115
|
name = self.names.value
|
|
108
116
|
pattern = r",\s*(?![^()]*\))"
|
|
@@ -128,6 +136,20 @@ class GismapWidget:
|
|
|
128
136
|
return lab.html()
|
|
129
137
|
|
|
130
138
|
def compute_function(self, b, show=True):
|
|
139
|
+
"""
|
|
140
|
+
Handle compute button click and generate visualization.
|
|
141
|
+
|
|
142
|
+
Parameters
|
|
143
|
+
----------
|
|
144
|
+
b : :class:`ipywidgets.Button`
|
|
145
|
+
The button widget that triggered this callback.
|
|
146
|
+
show : :class:`bool`, default=True
|
|
147
|
+
Whether to display the result in the widget.
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
None
|
|
152
|
+
"""
|
|
131
153
|
self.show = show
|
|
132
154
|
full = self.html()
|
|
133
155
|
b64 = base64.b64encode(
|
|
@@ -135,9 +157,7 @@ class GismapWidget:
|
|
|
135
157
|
).decode("utf8")
|
|
136
158
|
payload = f"data:text/html;base64,{b64}"
|
|
137
159
|
savename = safe_filename(self.names.value)
|
|
138
|
-
link_html =
|
|
139
|
-
f"<a href='{payload}' download='{savename}'>Download the Map!</a>"
|
|
140
|
-
)
|
|
160
|
+
link_html = f"<a href='{payload}' download='{savename}'>Download the Map!</a>"
|
|
141
161
|
self.save_link.value = link_html
|
|
142
162
|
if show:
|
|
143
163
|
self.out.clear_output()
|
gismap/lab/egomap.py
CHANGED
|
@@ -4,19 +4,27 @@ from gismap.lab.lab_author import LabAuthor
|
|
|
4
4
|
|
|
5
5
|
class EgoMap(LabMap):
|
|
6
6
|
"""
|
|
7
|
+
Egocentric view of a researcher's collaboration network.
|
|
8
|
+
|
|
9
|
+
Displays the *star* (central researcher), their *planets* (direct co-authors),
|
|
10
|
+
and optionally *moons* (co-authors of co-authors).
|
|
11
|
+
|
|
7
12
|
Parameters
|
|
8
13
|
----------
|
|
9
|
-
star
|
|
10
|
-
|
|
11
|
-
|
|
14
|
+
star: :class:`str` or :class:`~gismap.lab.lab_author.LabAuthor`
|
|
15
|
+
The central researcher. Can be a name string or LabAuthor object.
|
|
16
|
+
*args
|
|
17
|
+
Passed to :class:`~gismap.lab.labmap.LabMap`.
|
|
18
|
+
**kwargs
|
|
19
|
+
Passed to :class:`~gismap.lab.labmap.LabMap`.
|
|
12
20
|
|
|
13
21
|
Examples
|
|
14
22
|
--------
|
|
15
23
|
|
|
16
|
-
>>> dang = EgoMap("The-Dang Huynh"
|
|
17
|
-
>>> dang.build(target=
|
|
18
|
-
>>> sorted(a.name for a in dang.authors.values()) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
|
|
19
|
-
['Bruno Kauffmann', '
|
|
24
|
+
>>> dang = EgoMap("The-Dang Huynh")
|
|
25
|
+
>>> dang.build(target=20)
|
|
26
|
+
>>> sorted(a.name for a in dang.authors.values() if len(a.name.split())<3) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
|
|
27
|
+
['Bruno Kauffmann', 'Diego Perino', 'Dohy Hong', 'Fabien Mathieu', 'François Baccelli',...]
|
|
20
28
|
"""
|
|
21
29
|
|
|
22
30
|
def __init__(self, star, *args, **kwargs):
|
|
@@ -31,6 +39,20 @@ class EgoMap(LabMap):
|
|
|
31
39
|
yield self.star
|
|
32
40
|
|
|
33
41
|
def build(self, **kwargs):
|
|
42
|
+
"""
|
|
43
|
+
Build the ego network by fetching publications and adding planets/moons.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
target : :class:`int`, default=50
|
|
48
|
+
Target number of authors in the final map.
|
|
49
|
+
**kwargs
|
|
50
|
+
Passed to :meth:`~gismap.lab.labmap.LabMap.expand`.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
None
|
|
55
|
+
"""
|
|
34
56
|
target = kwargs.pop("target", 50)
|
|
35
57
|
self.update_authors(desc="Star metadata")
|
|
36
58
|
self.update_publis(desc="Star publications")
|
gismap/lab/expansion.py
CHANGED
|
@@ -139,7 +139,14 @@ def get_prospects(lab):
|
|
|
139
139
|
@dataclass
|
|
140
140
|
class Member:
|
|
141
141
|
"""
|
|
142
|
-
Basic information
|
|
142
|
+
Basic information about a lab member for name matching.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
name : :class:`str`
|
|
147
|
+
Normalized name.
|
|
148
|
+
key : :class:`str`
|
|
149
|
+
Author key.
|
|
143
150
|
"""
|
|
144
151
|
|
|
145
152
|
name: str
|
|
@@ -190,6 +197,33 @@ def trim_sources(author):
|
|
|
190
197
|
def proper_prospects(
|
|
191
198
|
lab, length_impact=0.05, threshold=80, n_range=4, max_new=None, trim=True
|
|
192
199
|
):
|
|
200
|
+
"""
|
|
201
|
+
Find and rank external collaborators for potential lab expansion.
|
|
202
|
+
|
|
203
|
+
Identifies authors from publications who are not already lab members,
|
|
204
|
+
groups them by name similarity, and ranks by collaboration strength.
|
|
205
|
+
|
|
206
|
+
Parameters
|
|
207
|
+
----------
|
|
208
|
+
lab : :class:`~gismap.lab.labmap.LabMap`
|
|
209
|
+
Reference lab.
|
|
210
|
+
length_impact : :class:`float`, default=0.05
|
|
211
|
+
Length impact for name similarity matching.
|
|
212
|
+
threshold : :class:`int`, default=80
|
|
213
|
+
Similarity threshold for grouping authors.
|
|
214
|
+
n_range : :class:`int`, default=4
|
|
215
|
+
N-gram range for name comparison.
|
|
216
|
+
max_new : :class:`int`, optional
|
|
217
|
+
Maximum number of new authors to return.
|
|
218
|
+
trim : :class:`bool`, default=True
|
|
219
|
+
If True, keep only one source per database for each author.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
:class:`tuple`
|
|
224
|
+
(existing, new_rosetta) where existing maps external keys to lab member keys,
|
|
225
|
+
and new_rosetta maps source keys to new LabAuthor objects.
|
|
226
|
+
"""
|
|
193
227
|
member_names = get_member_names(lab)
|
|
194
228
|
prospects = get_prospects(lab)
|
|
195
229
|
|
gismap/lab/lab_author.py
CHANGED
|
@@ -56,6 +56,7 @@ class LabAuthor(SourcedAuthor):
|
|
|
56
56
|
>>> dummy2.sources
|
|
57
57
|
[HALAuthor(name='My Name', key='key1'), HALAuthor(name='My Name', key='123456', key_type='pid'), HALAuthor(name='My Name', key='My Other Name', key_type='fullname')]
|
|
58
58
|
"""
|
|
59
|
+
|
|
59
60
|
metadata: AuthorMetadata = field(default_factory=AuthorMetadata)
|
|
60
61
|
|
|
61
62
|
def auto_img(self):
|
|
@@ -116,12 +117,41 @@ class LabAuthor(SourcedAuthor):
|
|
|
116
117
|
|
|
117
118
|
|
|
118
119
|
def labify_author(author, rosetta):
|
|
120
|
+
"""
|
|
121
|
+
Convert a database author to a LabAuthor if possible.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
author : :class:`~gismap.sources.models.Author`
|
|
126
|
+
Author to convert.
|
|
127
|
+
rosetta : :class:`dict`
|
|
128
|
+
Mapping from keys/names to LabAuthor objects.
|
|
129
|
+
|
|
130
|
+
Returns
|
|
131
|
+
-------
|
|
132
|
+
:class:`~gismap.lab.lab_author.LabAuthor` or original author
|
|
133
|
+
LabAuthor if found in rosetta, otherwise the original author.
|
|
134
|
+
"""
|
|
119
135
|
if isinstance(author, LabAuthor):
|
|
120
136
|
return author
|
|
121
137
|
return rosetta.get(author.key, rosetta.get(author.name, author))
|
|
122
138
|
|
|
123
139
|
|
|
124
140
|
def labify_publications(pubs, rosetta):
|
|
141
|
+
"""
|
|
142
|
+
Convert publication authors to LabAuthors in place.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
pubs : :class:`list`
|
|
147
|
+
Publications to update.
|
|
148
|
+
rosetta : :class:`dict`
|
|
149
|
+
Mapping from keys/names to LabAuthor objects.
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
None
|
|
154
|
+
"""
|
|
125
155
|
for pub in pubs:
|
|
126
156
|
pub.authors = [labify_author(a, rosetta) for a in pub.authors]
|
|
127
157
|
for source in getattr(pub, "sources", []):
|
gismap/lab/labmap.py
CHANGED
|
@@ -91,7 +91,9 @@ class LabMap(MixInIO):
|
|
|
91
91
|
if not all(f(author) for f in self.author_selectors):
|
|
92
92
|
continue
|
|
93
93
|
if len(author.sources) == 0:
|
|
94
|
-
author.auto_sources(
|
|
94
|
+
author.auto_sources(
|
|
95
|
+
dbs=list_of_objects(self.dbs, db_dict, default=default_dbs)
|
|
96
|
+
)
|
|
95
97
|
if author.sources:
|
|
96
98
|
self.authors[author.key] = author
|
|
97
99
|
if author.metadata.img is None:
|
|
@@ -152,9 +154,36 @@ class LabMap(MixInIO):
|
|
|
152
154
|
return None
|
|
153
155
|
|
|
154
156
|
def html(self, **kwargs):
|
|
157
|
+
"""
|
|
158
|
+
Generate HTML representation of the collaboration graph.
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
**kwargs
|
|
163
|
+
Passed to :func:`~gismap.gisgraphs.builder.make_vis`.
|
|
164
|
+
|
|
165
|
+
Returns
|
|
166
|
+
-------
|
|
167
|
+
:class:`str`
|
|
168
|
+
HTML content as a string.
|
|
169
|
+
"""
|
|
155
170
|
return make_vis(self, **kwargs)
|
|
156
171
|
|
|
157
172
|
def save_html(self, name=None, **kwargs):
|
|
173
|
+
"""
|
|
174
|
+
Save the collaboration graph as an HTML file.
|
|
175
|
+
|
|
176
|
+
Parameters
|
|
177
|
+
----------
|
|
178
|
+
name: :class:`str`, optional
|
|
179
|
+
Output filename. Defaults to lab name.
|
|
180
|
+
**kwargs
|
|
181
|
+
Passed to :meth:`html`.
|
|
182
|
+
|
|
183
|
+
Returns
|
|
184
|
+
-------
|
|
185
|
+
None
|
|
186
|
+
"""
|
|
158
187
|
if name is None:
|
|
159
188
|
name = self.name
|
|
160
189
|
name = Path(name).with_suffix(".html")
|
|
@@ -162,6 +191,18 @@ class LabMap(MixInIO):
|
|
|
162
191
|
f.write(self.html(**kwargs))
|
|
163
192
|
|
|
164
193
|
def show_html(self, **kwargs):
|
|
194
|
+
"""
|
|
195
|
+
Display the collaboration graph in a Jupyter notebook.
|
|
196
|
+
|
|
197
|
+
Parameters
|
|
198
|
+
----------
|
|
199
|
+
**kwargs
|
|
200
|
+
Passed to :meth:`html`.
|
|
201
|
+
|
|
202
|
+
Returns
|
|
203
|
+
-------
|
|
204
|
+
None
|
|
205
|
+
"""
|
|
165
206
|
display(HTML(self.html(**kwargs)))
|
|
166
207
|
|
|
167
208
|
|
|
@@ -174,9 +215,9 @@ class ListMap(LabMap):
|
|
|
174
215
|
author_list: :class:`list` of :class:`str`
|
|
175
216
|
List of authors names.
|
|
176
217
|
args: :class:`list`
|
|
177
|
-
Arguments to pass to the :class:`~gismap.lab.lab.Lab`
|
|
218
|
+
Arguments to pass to the :class:`~gismap.lab.lab.Lab` constructor.
|
|
178
219
|
kwargs: :class:`dict`
|
|
179
|
-
Keyword arguments to pass to the :class:`~gismap.lab.lab.Lab`
|
|
220
|
+
Keyword arguments to pass to the :class:`~gismap.lab.lab.Lab` constructor.
|
|
180
221
|
"""
|
|
181
222
|
|
|
182
223
|
def __init__(self, author_list, *args, **kwargs):
|
gismap/lab_examples/cedric.py
CHANGED
|
@@ -10,37 +10,50 @@ class CedricMap(LabMap):
|
|
|
10
10
|
Class for handling a CNAM Cedric team from its name.
|
|
11
11
|
Default to `roc` team.
|
|
12
12
|
"""
|
|
13
|
+
|
|
13
14
|
name = "roc"
|
|
14
15
|
base_url = "https://cedric.cnam.fr"
|
|
15
16
|
|
|
16
17
|
def _author_iterator(self):
|
|
17
18
|
soup = Soup(get(f"{self.base_url}/equipes/{self.name}/"), features="lxml")
|
|
18
|
-
searchers = [
|
|
19
|
+
searchers = [
|
|
20
|
+
li.a
|
|
21
|
+
for ul in soup.find("div", {"id": "annuaire"})("ul")[:3]
|
|
22
|
+
for li in ul("li")
|
|
23
|
+
]
|
|
19
24
|
done = set()
|
|
20
25
|
for searcher in searchers:
|
|
21
|
-
name = searcher.text.split(
|
|
26
|
+
name = searcher.text.split("(")[0].strip()
|
|
22
27
|
if name in done:
|
|
23
28
|
continue
|
|
24
29
|
url = f"{self.base_url}{searcher['href']}"
|
|
25
30
|
sousoup = Soup(get(url), features="lxml")
|
|
26
|
-
img = sousoup.find(
|
|
31
|
+
img = sousoup.find("img", {"class": "photo"})["src"]
|
|
27
32
|
response = requests.head(img, allow_redirects=True)
|
|
28
33
|
if int(response.headers.get("Content-Length")) < 3000:
|
|
29
34
|
img = None
|
|
30
35
|
done.add(name)
|
|
31
|
-
yield LabAuthor(
|
|
36
|
+
yield LabAuthor(
|
|
37
|
+
name=name,
|
|
38
|
+
metadata=AuthorMetadata(url=url, img=img, group=self.name.upper()),
|
|
39
|
+
)
|
|
32
40
|
|
|
33
41
|
|
|
34
42
|
class CedricFull(LabMap):
|
|
35
43
|
"""
|
|
36
44
|
Class for handling all CNAM Cedric teams using `https://cedric.cnam.fr/equipes` to get team names.
|
|
37
45
|
"""
|
|
46
|
+
|
|
38
47
|
name = "Cedric"
|
|
39
48
|
|
|
40
49
|
def _author_iterator(self):
|
|
41
50
|
base = "https://cedric.cnam.fr/equipes/"
|
|
42
|
-
soup =
|
|
43
|
-
teams = {
|
|
51
|
+
soup = Soup(get(base), features="lxml")
|
|
52
|
+
teams = {
|
|
53
|
+
a["href"].split("/")[-2]
|
|
54
|
+
for a in soup("a")
|
|
55
|
+
if base in a.get("href", "") and len(a["href"]) > len(base)
|
|
56
|
+
}
|
|
44
57
|
for team in teams:
|
|
45
58
|
for author in CedricMap(name=team)._author_iterator():
|
|
46
59
|
yield author
|
gismap/lab_examples/lamsade.py
CHANGED
|
@@ -18,8 +18,8 @@ def lamsade_parse(div):
|
|
|
18
18
|
:class:`tuple`
|
|
19
19
|
name, image url (or None), webpage (or None)
|
|
20
20
|
"""
|
|
21
|
-
img = div.img[
|
|
22
|
-
url = div.a[
|
|
21
|
+
img = div.img["src"] if div.img else None
|
|
22
|
+
url = div.a["href"] if div.a else None
|
|
23
23
|
name = div.h2.text.strip().title()
|
|
24
24
|
name = " ".join(name.split(" ", 1)[::-1])
|
|
25
25
|
return name, img, url
|
|
@@ -35,9 +35,11 @@ class Lamsade(LabMap):
|
|
|
35
35
|
directory = "fr/personnes/enseignants-chercheurs-et-chercheurs.html"
|
|
36
36
|
|
|
37
37
|
def _author_iterator(self):
|
|
38
|
-
soup = Soup(get(self.base_url+self.directory), features="lxml")
|
|
39
|
-
for a in soup(
|
|
38
|
+
soup = Soup(get(self.base_url + self.directory), features="lxml")
|
|
39
|
+
for a in soup("div", class_="dauphinecv-item"):
|
|
40
40
|
name, img, url = lamsade_parse(a)
|
|
41
|
-
img = self.base_url+img if img else None
|
|
42
|
-
url = self.base_url+url if url else None
|
|
43
|
-
yield LabAuthor(
|
|
41
|
+
img = self.base_url + img if img else None
|
|
42
|
+
url = self.base_url + url if url else None
|
|
43
|
+
yield LabAuthor(
|
|
44
|
+
name=name, metadata=AuthorMetadata(url=url, img=img, group=self.name)
|
|
45
|
+
)
|
gismap/lab_examples/toulouse.py
CHANGED
|
@@ -29,18 +29,22 @@ class LaasMap(LabMap):
|
|
|
29
29
|
if "public_avatar" in a.img["class"]
|
|
30
30
|
else None
|
|
31
31
|
)
|
|
32
|
-
yield LabAuthor(
|
|
32
|
+
yield LabAuthor(
|
|
33
|
+
name=name,
|
|
34
|
+
metadata=AuthorMetadata(url=url, img=img, group=self.name.upper()),
|
|
35
|
+
)
|
|
33
36
|
|
|
34
37
|
|
|
35
38
|
class LaasFull(LabMap):
|
|
36
39
|
"""
|
|
37
40
|
Class for handling all LAAS teams using `https://www.laas.fr/fr/equipes/` to get team names.
|
|
38
41
|
"""
|
|
42
|
+
|
|
39
43
|
name = "LAAS"
|
|
40
44
|
|
|
41
45
|
def _author_iterator(self):
|
|
42
46
|
soup = Soup(get("https://www.laas.fr/fr/equipes/"), features="lxml")
|
|
43
|
-
teams = [a[
|
|
47
|
+
teams = [a["href"].split("/")[-2] for a in soup("a", {"class": "badge"})]
|
|
44
48
|
for team in teams:
|
|
45
49
|
for author in LaasMap(name=team)._author_iterator():
|
|
46
50
|
yield author
|
gismap/search.py
CHANGED
|
@@ -6,7 +6,17 @@ from gismap.utils.text import reduce_keywords, Corrector
|
|
|
6
6
|
|
|
7
7
|
class SearchAction:
|
|
8
8
|
"""
|
|
9
|
-
|
|
9
|
+
Base class for extracting search results from a Gismo.
|
|
10
|
+
|
|
11
|
+
Subclasses should implement :meth:`process` to define
|
|
12
|
+
how to extract results from the gismo.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
name : :class:`str`, optional
|
|
17
|
+
Name of this action (used as key in results dict).
|
|
18
|
+
post : callable, optional
|
|
19
|
+
Post-processing function applied to results.
|
|
10
20
|
"""
|
|
11
21
|
|
|
12
22
|
def __init__(self, name=None, post=None):
|
|
@@ -14,9 +24,33 @@ class SearchAction:
|
|
|
14
24
|
self.post = (lambda x: x) if post is None else post
|
|
15
25
|
|
|
16
26
|
def process(self, gismo):
|
|
27
|
+
"""
|
|
28
|
+
Extract results from the gismo. Must be implemented by subclasses.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
gismo : :class:`~gismo.gismo.Gismo`
|
|
33
|
+
The gismo to query.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
Results (type depends on subclass).
|
|
38
|
+
"""
|
|
17
39
|
raise NotImplementedError
|
|
18
40
|
|
|
19
41
|
def run(self, gismo):
|
|
42
|
+
"""
|
|
43
|
+
Execute the action and apply post-processing.
|
|
44
|
+
|
|
45
|
+
Parameters
|
|
46
|
+
----------
|
|
47
|
+
gismo : :class:`~gismo.gismo.Gismo`
|
|
48
|
+
The gismo to query.
|
|
49
|
+
|
|
50
|
+
Returns
|
|
51
|
+
-------
|
|
52
|
+
Post-processed results.
|
|
53
|
+
"""
|
|
20
54
|
return self.post(self.process(gismo))
|
|
21
55
|
|
|
22
56
|
|
|
@@ -153,6 +187,19 @@ publi_template = Template("""
|
|
|
153
187
|
|
|
154
188
|
|
|
155
189
|
def publi_to_html(publi):
|
|
190
|
+
"""
|
|
191
|
+
Convert a publication to an HTML list item.
|
|
192
|
+
|
|
193
|
+
Parameters
|
|
194
|
+
----------
|
|
195
|
+
publi : :class:`~gismap.sources.models.Publication`
|
|
196
|
+
Publication to convert.
|
|
197
|
+
|
|
198
|
+
Returns
|
|
199
|
+
-------
|
|
200
|
+
:class:`str`
|
|
201
|
+
HTML list item string.
|
|
202
|
+
"""
|
|
156
203
|
dico = dict()
|
|
157
204
|
for db in ["hal", "dblp"]:
|
|
158
205
|
source = publi.sources.get(db)
|
|
@@ -167,6 +214,19 @@ def publi_to_html(publi):
|
|
|
167
214
|
|
|
168
215
|
|
|
169
216
|
def publis_to_html(publis):
|
|
217
|
+
"""
|
|
218
|
+
Convert a list of publications to an HTML unordered list.
|
|
219
|
+
|
|
220
|
+
Parameters
|
|
221
|
+
----------
|
|
222
|
+
publis : :class:`list`
|
|
223
|
+
List of publications.
|
|
224
|
+
|
|
225
|
+
Returns
|
|
226
|
+
-------
|
|
227
|
+
:class:`str`
|
|
228
|
+
HTML unordered list string.
|
|
229
|
+
"""
|
|
170
230
|
rows = "\n".join(publi_to_html(p) for p in publis)
|
|
171
231
|
return f"<ul>\n{rows}\n</ul>"
|
|
172
232
|
|
gismap/sources/dblp.py
CHANGED
|
@@ -98,6 +98,7 @@ class DBLPAuthor(Author, DBLP):
|
|
|
98
98
|
key='conf/sss/Mathieu07')
|
|
99
99
|
|
|
100
100
|
"""
|
|
101
|
+
|
|
101
102
|
key: str
|
|
102
103
|
aliases: list = field(default_factory=list)
|
|
103
104
|
|
|
@@ -124,6 +125,27 @@ DBLP_TYPES = {
|
|
|
124
125
|
|
|
125
126
|
@dataclass(repr=False)
|
|
126
127
|
class DBLPPublication(Publication, DBLP):
|
|
128
|
+
"""
|
|
129
|
+
Publication from the DBLP database.
|
|
130
|
+
|
|
131
|
+
Parameters
|
|
132
|
+
----------
|
|
133
|
+
title: :class:`str`
|
|
134
|
+
Publication title.
|
|
135
|
+
authors: :class:`list`
|
|
136
|
+
List of :class:`DBLPAuthor` objects.
|
|
137
|
+
venue: :class:`str`
|
|
138
|
+
Publication venue.
|
|
139
|
+
type: :class:`str`
|
|
140
|
+
Publication type.
|
|
141
|
+
year: :class:`int`
|
|
142
|
+
Publication year.
|
|
143
|
+
key: :class:`str`
|
|
144
|
+
DBLP record key.
|
|
145
|
+
metadata: :class:`dict`
|
|
146
|
+
Additional metadata (pages, volume, etc.).
|
|
147
|
+
"""
|
|
148
|
+
|
|
127
149
|
key: str
|
|
128
150
|
metadata: dict = field(default_factory=dict)
|
|
129
151
|
|