gismap 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gismap/__init__.py +2 -0
- gismap/lab/__init__.py +2 -2
- gismap/lab/egomap.py +42 -0
- gismap/lab/expansion.py +240 -0
- gismap/lab/filters.py +18 -0
- gismap/lab/graph.py +11 -8
- gismap/lab/lab.py +86 -70
- gismap/lab/lab_author.py +84 -0
- gismap/lab/lip6.py +2 -1
- gismap/lab/toulouse.py +2 -1
- gismap/lab/vis.py +183 -25
- gismap/sources/dblp.py +26 -22
- gismap/sources/hal.py +47 -11
- gismap/sources/multi.py +68 -31
- gismap/utils/common.py +47 -2
- gismap/utils/requests.py +24 -11
- gismap/utils/text.py +66 -1
- {gismap-0.1.0.dist-info → gismap-0.2.1.dist-info}/METADATA +6 -4
- gismap-0.2.1.dist-info/RECORD +29 -0
- gismap-0.1.0.dist-info/RECORD +0 -25
- {gismap-0.1.0.dist-info → gismap-0.2.1.dist-info}/WHEEL +0 -0
- {gismap-0.1.0.dist-info → gismap-0.2.1.dist-info}/licenses/AUTHORS.md +0 -0
gismap/__init__.py
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
from importlib.metadata import metadata
|
|
4
4
|
|
|
5
|
+
from gismap.sources.hal import HAL as HAL, HALAuthor as HALAuthor
|
|
6
|
+
from gismap.sources.dblp import DBLP as DBLP, DBLPAuthor as DBLPAuthor
|
|
5
7
|
from gismap.utils.common import get_classes as get_classes
|
|
6
8
|
from gismap.gismo import make_gismo as make_gismo
|
|
7
9
|
from gismap.search import (
|
gismap/lab/__init__.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from gismap.lab.lab import (
|
|
2
2
|
Lab as Lab,
|
|
3
3
|
ListLab as ListLab,
|
|
4
|
-
LabAuthor as LabAuthor,
|
|
5
|
-
AuthorMetadata as AuthorMetadata,
|
|
6
4
|
)
|
|
7
5
|
from gismap.lab.lip6 import Lip6 as Lip6, Lip6Lab as Lip6Lab
|
|
8
6
|
from gismap.lab.toulouse import Solace as Solace, LaasLab as LaasLab
|
|
9
7
|
from gismap.lab.graph import lab2graph as lab2graph
|
|
10
8
|
from gismap.lab.vis import generate_html as generate_html
|
|
9
|
+
from gismap.lab.egomap import EgoMap as EgoMap
|
|
10
|
+
from gismap.lab.lab_author import LabAuthor as LabAuthor
|
gismap/lab/egomap.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from gismap.lab.lab import Lab
|
|
2
|
+
from gismap.lab.lab_author import LabAuthor
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class EgoMap(Lab):
|
|
6
|
+
"""
|
|
7
|
+
Parameters
|
|
8
|
+
----------
|
|
9
|
+
sun
|
|
10
|
+
args
|
|
11
|
+
kwargs
|
|
12
|
+
|
|
13
|
+
Examples
|
|
14
|
+
--------
|
|
15
|
+
|
|
16
|
+
>>> dang = EgoMap("The-Dang Huynh", dbs="hal")
|
|
17
|
+
>>> dang.build(target=10)
|
|
18
|
+
>>> sorted(a.name for a in dang.authors.values()) # doctest: +NORMALIZE_WHITESPACE
|
|
19
|
+
['Bruno Kauffmann', 'Chung Shue Chen', 'Fabien Mathieu', 'François Baccelli', 'Laurent Viennot', 'Ludovic Noirie',
|
|
20
|
+
'Siu-Wai Ho', 'Sébastien Tixeuil', 'The-Dang Huynh', 'Yannick Carlinet']
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, sun, *args, **kwargs):
|
|
24
|
+
if isinstance(sun, str):
|
|
25
|
+
sun = LabAuthor(sun)
|
|
26
|
+
sun.metadata.position = (0, 0)
|
|
27
|
+
self.sun = sun
|
|
28
|
+
super().__init__(*args, **kwargs)
|
|
29
|
+
|
|
30
|
+
def _author_iterator(self):
|
|
31
|
+
yield self.sun
|
|
32
|
+
|
|
33
|
+
def build(self, **kwargs):
|
|
34
|
+
target = kwargs.pop("target", 50)
|
|
35
|
+
group = kwargs.pop("group", "moon")
|
|
36
|
+
self.update_authors(desc="Sun metadata")
|
|
37
|
+
self.update_publis(desc="Sun publications")
|
|
38
|
+
kwargs["target"] = target - len(self.authors)
|
|
39
|
+
self.expand(group=None, desc="Planets", **kwargs)
|
|
40
|
+
kwargs.update({"target": target - len(self.authors), "group": group})
|
|
41
|
+
if kwargs["target"] > 0:
|
|
42
|
+
self.expand(desc="Moons", **kwargs)
|
gismap/lab/expansion.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from collections import Counter, defaultdict
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from bof.fuzz import Process
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from gismap.utils.text import normalized_name
|
|
7
|
+
from gismap.sources.multi import sort_author_sources
|
|
8
|
+
from gismap.lab.lab_author import LabAuthor
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ProspectStrength:
|
|
13
|
+
"""
|
|
14
|
+
Measures the interaction between an external author and a lab by counting co-authors and publications.
|
|
15
|
+
|
|
16
|
+
A (max,+) addition is handled to deal with multiple keys.
|
|
17
|
+
|
|
18
|
+
Examples
|
|
19
|
+
--------
|
|
20
|
+
|
|
21
|
+
>>> a1 = ProspectStrength(3, 5)
|
|
22
|
+
>>> a2 = ProspectStrength(2, 10)
|
|
23
|
+
>>> a1 > a2
|
|
24
|
+
True
|
|
25
|
+
>>> a1 + a2
|
|
26
|
+
ProspectStrength(coauthors=3, publications=15)
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
coauthors: int
|
|
30
|
+
publications: int
|
|
31
|
+
|
|
32
|
+
def __call__(self):
|
|
33
|
+
return self.coauthors, self.publications
|
|
34
|
+
|
|
35
|
+
def __add__(self, other):
|
|
36
|
+
if other == 0:
|
|
37
|
+
return self
|
|
38
|
+
return ProspectStrength(
|
|
39
|
+
coauthors=max(self.coauthors, other.coauthors),
|
|
40
|
+
publications=self.publications + other.publications,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def __radd__(self, other):
|
|
44
|
+
return self.__add__(other)
|
|
45
|
+
|
|
46
|
+
def __lt__(self, other):
|
|
47
|
+
return self() < other()
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def count_prospect_entries(lab):
|
|
51
|
+
"""
|
|
52
|
+
Associate to external coauthors (prospects) their lab strength.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
lab: :class:`~gismap.lab.lab.Lab`
|
|
57
|
+
Reference lab.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
:class:`dict` of :class:`str` to :class:`~gismap.lab.expansion.ProspectStrength`
|
|
62
|
+
Lab strengths.
|
|
63
|
+
"""
|
|
64
|
+
count_coauthors = defaultdict(set)
|
|
65
|
+
count_publications = []
|
|
66
|
+
for p in lab.publications.values():
|
|
67
|
+
for s in p.sources:
|
|
68
|
+
new_authors = set()
|
|
69
|
+
lab_authors = set()
|
|
70
|
+
for a in s.authors:
|
|
71
|
+
if hasattr(a, "db_name"):
|
|
72
|
+
new_authors.add(a.key)
|
|
73
|
+
count_publications.append(a.key)
|
|
74
|
+
else:
|
|
75
|
+
lab_authors.add(a.key)
|
|
76
|
+
for l in lab_authors:
|
|
77
|
+
count_coauthors[l].update(new_authors)
|
|
78
|
+
|
|
79
|
+
count_coauthors = Counter(
|
|
80
|
+
k for new_authors in count_coauthors.values() for k in new_authors
|
|
81
|
+
)
|
|
82
|
+
count_publications = Counter(count_publications)
|
|
83
|
+
|
|
84
|
+
return {
|
|
85
|
+
k: ProspectStrength(
|
|
86
|
+
coauthors=count_coauthors.get(k, 0), publications=count_publications[k]
|
|
87
|
+
)
|
|
88
|
+
for k in count_publications
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class Prospect:
|
|
93
|
+
"""
|
|
94
|
+
Candidate for integration to lab.
|
|
95
|
+
|
|
96
|
+
Parameters
|
|
97
|
+
----------
|
|
98
|
+
author: :class:`~gismap.sources.models.Author`
|
|
99
|
+
Reference author. Must have a key.
|
|
100
|
+
strengths: :class:`dict`
|
|
101
|
+
Dictionary of ProspectStrength.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
def __init__(self, author, strengths):
|
|
105
|
+
self.name = normalized_name(author.name)
|
|
106
|
+
self.author = author
|
|
107
|
+
self.score = strengths[author.key]
|
|
108
|
+
|
|
109
|
+
def __lt__(self, other):
|
|
110
|
+
return self.score < other.score
|
|
111
|
+
|
|
112
|
+
def __repr__(self):
|
|
113
|
+
return f"Prospect({self.name}, key={self.author.key}, s={self.score()}"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def get_prospects(lab):
|
|
117
|
+
"""
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
lab: :class:`~gismap.lab.lab.Lab`
|
|
121
|
+
Reference lab.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
:class:`list` of :class:`~gismap.lab.expansion.Prospect`
|
|
126
|
+
List of prospects.
|
|
127
|
+
"""
|
|
128
|
+
strengths = count_prospect_entries(lab)
|
|
129
|
+
prospect_dico = {
|
|
130
|
+
a.key: a
|
|
131
|
+
for p in lab.publications.values()
|
|
132
|
+
for s in p.sources
|
|
133
|
+
for a in s.authors
|
|
134
|
+
if hasattr(a, "db_name") and all(f(a) for f in lab.author_selectors)
|
|
135
|
+
}
|
|
136
|
+
return [Prospect(a, strengths) for a in prospect_dico.values()]
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@dataclass
|
|
140
|
+
class Member:
|
|
141
|
+
"""
|
|
142
|
+
Basic information
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
name: str
|
|
146
|
+
key: str
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def get_member_names(lab):
|
|
150
|
+
"""
|
|
151
|
+
Parameters
|
|
152
|
+
----------
|
|
153
|
+
lab: :class:`~gismap.lab.lab.Lab`
|
|
154
|
+
Reference lab.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
:class:`list`
|
|
159
|
+
Tuples simplified-name -> key
|
|
160
|
+
"""
|
|
161
|
+
return [
|
|
162
|
+
(name, k)
|
|
163
|
+
for k, a in lab.authors.items()
|
|
164
|
+
for name in {normalized_name(n) for n in [a.name, *a.aliases]}
|
|
165
|
+
]
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def trim_sources(author):
|
|
169
|
+
"""
|
|
170
|
+
Inplace reduction of sources, keeping one unique source per db.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
author: :class:`~gismap.sources.multi.SourcedAuthor`
|
|
175
|
+
An author.
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
None
|
|
180
|
+
"""
|
|
181
|
+
sources = []
|
|
182
|
+
seen = set()
|
|
183
|
+
for s in author.sources:
|
|
184
|
+
if s.db_name not in seen:
|
|
185
|
+
seen.add(s.db_name)
|
|
186
|
+
sources.append(s)
|
|
187
|
+
author.sources = sources
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def proper_prospects(
|
|
191
|
+
lab, length_impact=0.05, threshold=80, n_range=4, max_new=None, trim=True
|
|
192
|
+
):
|
|
193
|
+
member_names = get_member_names(lab)
|
|
194
|
+
prospects = get_prospects(lab)
|
|
195
|
+
|
|
196
|
+
if len(member_names) == 0 or len(prospects) == 0:
|
|
197
|
+
return dict(), dict()
|
|
198
|
+
|
|
199
|
+
done = np.zeros(len(prospects), dtype=bool)
|
|
200
|
+
|
|
201
|
+
# Compare current and prospects names to re-attach ghosts
|
|
202
|
+
p = Process(length_impact=length_impact, n_range=n_range)
|
|
203
|
+
p.allow_updates = False
|
|
204
|
+
p.fit([n[0] for n in member_names])
|
|
205
|
+
jc = p.transform([prospect.name for prospect in prospects])
|
|
206
|
+
best_choice = np.argmax(jc, axis=1)
|
|
207
|
+
existing = dict()
|
|
208
|
+
for i, j in enumerate(best_choice):
|
|
209
|
+
if jc[i, j] > threshold:
|
|
210
|
+
existing[prospects[i].author.key] = member_names[j][1]
|
|
211
|
+
done[i] = True
|
|
212
|
+
|
|
213
|
+
# Regroup remaining prospects
|
|
214
|
+
p.reset()
|
|
215
|
+
names = [prospect.name for prospect in prospects]
|
|
216
|
+
p.fit(names)
|
|
217
|
+
jc = p.transform(names)
|
|
218
|
+
new_lab = []
|
|
219
|
+
for i, prospect in enumerate(prospects):
|
|
220
|
+
if done[i]:
|
|
221
|
+
continue
|
|
222
|
+
locs = [j for j in np.where(jc[i, :] > threshold)[0] if not done[j]]
|
|
223
|
+
done[locs] = True
|
|
224
|
+
sources = sort_author_sources([prospects[j].author for j in locs])
|
|
225
|
+
strength = sum(prospects[j].score for j in locs)
|
|
226
|
+
new_author = LabAuthor.from_sources(sources)
|
|
227
|
+
new_lab.append((strength, new_author))
|
|
228
|
+
|
|
229
|
+
# Extract top prospects
|
|
230
|
+
new_lab = [l[1] for l in sorted(new_lab, key=lambda l: l[0], reverse=True)][
|
|
231
|
+
:max_new
|
|
232
|
+
]
|
|
233
|
+
new_rosetta = {s.key: a for a in new_lab for s in a.sources}
|
|
234
|
+
|
|
235
|
+
# Remove extra sources
|
|
236
|
+
if trim:
|
|
237
|
+
for a in new_lab:
|
|
238
|
+
trim_sources(a)
|
|
239
|
+
|
|
240
|
+
return existing, new_rosetta
|
gismap/lab/filters.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
def taboos(txt, words):
|
|
2
|
+
if isinstance(words, str):
|
|
3
|
+
return words not in txt
|
|
4
|
+
else:
|
|
5
|
+
return all(w not in txt for w in words)
|
|
6
|
+
|
|
7
|
+
def publication_size_filter(n_max=10):
|
|
8
|
+
return lambda p: len(p.authors) < n_max
|
|
9
|
+
|
|
10
|
+
def publication_taboo_filter(w=None):
|
|
11
|
+
if w is None:
|
|
12
|
+
w = ["Editorial", "Foreword"]
|
|
13
|
+
return lambda p: taboos(p.title, w)
|
|
14
|
+
|
|
15
|
+
def author_taboo_filter(w=None):
|
|
16
|
+
if w is None:
|
|
17
|
+
w = ["Buob", "Kofman"]
|
|
18
|
+
return lambda a: taboos(a.name, w)
|
gismap/lab/graph.py
CHANGED
|
@@ -59,8 +59,11 @@ def publication_to_html(pub):
|
|
|
59
59
|
HTML string with hyperlinks where applicable.
|
|
60
60
|
"""
|
|
61
61
|
# Title as link if available
|
|
62
|
-
|
|
63
|
-
|
|
62
|
+
url = getattr(pub, "url", None)
|
|
63
|
+
if url is None and hasattr(pub, "sources"):
|
|
64
|
+
url = getattr(pub.sources[0], "url", None)
|
|
65
|
+
if url:
|
|
66
|
+
title_html = f'<a href="{url}" target="_blank">{pub.title}</a>'
|
|
64
67
|
else:
|
|
65
68
|
title_html = pub.title
|
|
66
69
|
|
|
@@ -129,7 +132,7 @@ def to_node(s, node_pubs):
|
|
|
129
132
|
"""
|
|
130
133
|
Parameters
|
|
131
134
|
----------
|
|
132
|
-
s: :class:`~gismap.lab.
|
|
135
|
+
s: :class:`~gismap.lab.lab_author.LabAuthor`
|
|
133
136
|
Searcher.
|
|
134
137
|
node_pubs: :class:`dict`
|
|
135
138
|
Lab publications.
|
|
@@ -203,17 +206,17 @@ def lab2graph(lab):
|
|
|
203
206
|
>>> lab.update_publis()
|
|
204
207
|
>>> len(lab.authors)
|
|
205
208
|
2
|
|
206
|
-
>>> len(lab.publications)
|
|
207
|
-
|
|
209
|
+
>>> 400 < len(lab.publications) < 440
|
|
210
|
+
True
|
|
208
211
|
>>> html = lab2graph(lab)
|
|
209
212
|
>>> html[:80] # doctest: +ELLIPSIS
|
|
210
|
-
'\\n<div
|
|
213
|
+
'\\n<div class="gismap-content">\\n<div id="mynetwork_..."></div>\\n<a\\n href="htt'
|
|
211
214
|
"""
|
|
212
215
|
node_pubs = {k: [] for k in lab.authors}
|
|
213
216
|
edges_dict = defaultdict(list)
|
|
214
217
|
for p in lab.publications.values():
|
|
215
|
-
#
|
|
216
|
-
lauths = {a.key: a for
|
|
218
|
+
# Strange things can happen with multiple sources. This should take care of it.
|
|
219
|
+
lauths = {a.key: a for source in p.sources for a in source.authors if a.__class__.__name__ == "LabAuthor"}
|
|
217
220
|
lauths = sorted([a for a in lauths.values()], key=lambda a: str(a.key))
|
|
218
221
|
for a in lauths:
|
|
219
222
|
node_pubs[a.key].append(p)
|
gismap/lab/lab.py
CHANGED
|
@@ -1,67 +1,24 @@
|
|
|
1
1
|
from gismo import MixInIO
|
|
2
|
-
from
|
|
2
|
+
from tqdm.auto import tqdm
|
|
3
|
+
from IPython.display import display, HTML
|
|
4
|
+
from pathlib import Path
|
|
3
5
|
|
|
4
|
-
from gismap.utils.common import
|
|
6
|
+
from gismap.utils.common import list_of_objects
|
|
5
7
|
from gismap.utils.logger import logger
|
|
6
|
-
from gismap.
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
url: :class:`str`
|
|
21
|
-
Homepage of the author.
|
|
22
|
-
img: :class:`str`
|
|
23
|
-
Url to a picture.
|
|
24
|
-
group: :class:`str`
|
|
25
|
-
Group of the author.
|
|
26
|
-
position: :class:`tuple`
|
|
27
|
-
Coordinates of the author.
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
url: str = None
|
|
31
|
-
img: str = None
|
|
32
|
-
group: str = None
|
|
33
|
-
position: tuple = None
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
@dataclass(repr=False)
|
|
37
|
-
class LabAuthor(SourcedAuthor):
|
|
38
|
-
metadata: AuthorMetadata = field(default_factory=AuthorMetadata)
|
|
39
|
-
|
|
40
|
-
def auto_sources(self, dbs=None):
|
|
41
|
-
"""
|
|
42
|
-
Automatically populate the sources based on author's name.
|
|
43
|
-
|
|
44
|
-
Parameters
|
|
45
|
-
----------
|
|
46
|
-
dbs: :class:`list`, default=[:class:`~gismap.sources.hal.HAL`, :class:`~gismap.sources.dblp.DBLP`]
|
|
47
|
-
List of DB sources to use.
|
|
48
|
-
|
|
49
|
-
Returns
|
|
50
|
-
-------
|
|
51
|
-
None
|
|
52
|
-
"""
|
|
53
|
-
if dbs is None:
|
|
54
|
-
dbs = [HAL, DBLP]
|
|
55
|
-
sources = []
|
|
56
|
-
for db in dbs:
|
|
57
|
-
source = db.search_author(self.name)
|
|
58
|
-
if len(source) == 0:
|
|
59
|
-
logger.warning(f"{self.name} not found in {db.db_name}")
|
|
60
|
-
elif len(source) > 1:
|
|
61
|
-
logger.warning(f"Multiple entries for {self.name} in {db.db_name}")
|
|
62
|
-
sources += source
|
|
63
|
-
if len(sources) > 0:
|
|
64
|
-
self.sources = sources
|
|
8
|
+
from gismap.lab.lab_author import (
|
|
9
|
+
db_dict,
|
|
10
|
+
default_dbs,
|
|
11
|
+
AuthorMetadata,
|
|
12
|
+
LabAuthor,
|
|
13
|
+
labify_publications,
|
|
14
|
+
)
|
|
15
|
+
from gismap.sources.multi import (
|
|
16
|
+
regroup_authors,
|
|
17
|
+
regroup_publications,
|
|
18
|
+
)
|
|
19
|
+
from gismap.lab.expansion import proper_prospects
|
|
20
|
+
from gismap.lab.filters import author_taboo_filter, publication_taboo_filter, publication_size_filter
|
|
21
|
+
from gismap.lab.graph import lab2graph
|
|
65
22
|
|
|
66
23
|
|
|
67
24
|
class Lab(MixInIO):
|
|
@@ -76,16 +33,23 @@ class Lab(MixInIO):
|
|
|
76
33
|
Name of the lab. Can be set as class or instance attribute.
|
|
77
34
|
dbs: :class:`list`, default=[:class:`~gismap.sources.hal.HAL`, :class:`~gismap.sources.dblp.DBLP`]
|
|
78
35
|
List of DB sources to use.
|
|
36
|
+
author_selectors: :class:`list`
|
|
37
|
+
Author filter. Default: minimal filtering.
|
|
38
|
+
publication_selectors: :class:`list`
|
|
39
|
+
Publication filter. Default: less than 10 authors to remove black holes.
|
|
79
40
|
"""
|
|
80
41
|
|
|
81
42
|
name = None
|
|
82
|
-
dbs =
|
|
43
|
+
dbs = default_dbs
|
|
83
44
|
|
|
84
|
-
def __init__(
|
|
45
|
+
def __init__(
|
|
46
|
+
self, name=None, dbs=None):
|
|
85
47
|
if name is not None:
|
|
86
48
|
self.name = name
|
|
87
49
|
if dbs is not None:
|
|
88
|
-
self.dbs = dbs
|
|
50
|
+
self.dbs = list_of_objects(dbs, db_dict, default=default_dbs)
|
|
51
|
+
self.author_selectors = [author_taboo_filter()]
|
|
52
|
+
self.publication_selectors = [publication_size_filter(), publication_taboo_filter()]
|
|
89
53
|
self.authors = None
|
|
90
54
|
self.publications = None
|
|
91
55
|
|
|
@@ -100,7 +64,7 @@ class Lab(MixInIO):
|
|
|
100
64
|
"""
|
|
101
65
|
raise NotImplementedError
|
|
102
66
|
|
|
103
|
-
def update_authors(self):
|
|
67
|
+
def update_authors(self, desc="Author information"):
|
|
104
68
|
"""
|
|
105
69
|
Populate the authors attribute (:class:`dict` [:class:`str`, :class:`~gismap.lab.lab.LabAuthor`]).
|
|
106
70
|
|
|
@@ -109,12 +73,17 @@ class Lab(MixInIO):
|
|
|
109
73
|
None
|
|
110
74
|
"""
|
|
111
75
|
self.authors = dict()
|
|
112
|
-
for author in self._author_iterator():
|
|
113
|
-
author
|
|
76
|
+
for author in tqdm(self._author_iterator(), desc=desc):
|
|
77
|
+
if not all(f(author) for f in self.author_selectors):
|
|
78
|
+
continue
|
|
79
|
+
if len(author.sources) == 0:
|
|
80
|
+
author.auto_sources(dbs=self.dbs)
|
|
114
81
|
if author.sources:
|
|
115
82
|
self.authors[author.key] = author
|
|
83
|
+
if author.metadata.img is None:
|
|
84
|
+
author.auto_img()
|
|
116
85
|
|
|
117
|
-
def update_publis(self):
|
|
86
|
+
def update_publis(self, desc="Publications information"):
|
|
118
87
|
"""
|
|
119
88
|
Populate the publications attribute (:class:`dict` [:class:`str`, :class:`~gismap.sources.multi.SourcedPublication`]).
|
|
120
89
|
|
|
@@ -123,11 +92,58 @@ class Lab(MixInIO):
|
|
|
123
92
|
None
|
|
124
93
|
"""
|
|
125
94
|
pubs = dict()
|
|
126
|
-
for author in self.authors.values():
|
|
127
|
-
pubs.update(
|
|
95
|
+
for author in tqdm(self.authors.values(), desc=desc):
|
|
96
|
+
pubs.update(
|
|
97
|
+
author.get_publications(clean=False, selector=self.publication_selectors)
|
|
98
|
+
)
|
|
128
99
|
regroup_authors(self.authors, pubs)
|
|
129
100
|
self.publications = regroup_publications(pubs)
|
|
130
101
|
|
|
102
|
+
def expand(self, target=None, group="moon", desc="Moon information", **kwargs):
|
|
103
|
+
if target is None:
|
|
104
|
+
target = len(self.authors) // 3
|
|
105
|
+
old, rosetta = proper_prospects(self, max_new=target, **kwargs)
|
|
106
|
+
new = {a.key: a for a in rosetta.values()}
|
|
107
|
+
for k, v in old.items():
|
|
108
|
+
rosetta[k] = self.authors[v]
|
|
109
|
+
logger.debug(f"{len(new)} new authors selected")
|
|
110
|
+
if len(new) == 0:
|
|
111
|
+
logger.warning("Expansion failed: no new author found.")
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
self.authors.update(new)
|
|
115
|
+
|
|
116
|
+
pubs = dict()
|
|
117
|
+
for author in tqdm(new.values(), desc=desc):
|
|
118
|
+
author.auto_img()
|
|
119
|
+
author.metadata.group = group
|
|
120
|
+
pubs.update(
|
|
121
|
+
author.get_publications(clean=False, selector=self.publication_selectors)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
for pub in self.publications.values():
|
|
125
|
+
for source in pub.sources:
|
|
126
|
+
pubs[source.key] = source
|
|
127
|
+
|
|
128
|
+
labify_publications(pubs.values(), rosetta)
|
|
129
|
+
|
|
130
|
+
self.publications = regroup_publications(pubs)
|
|
131
|
+
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
def html(self, **kwargs):
|
|
135
|
+
return lab2graph(self, **kwargs)
|
|
136
|
+
|
|
137
|
+
def save_html(self, name=None, **kwargs):
|
|
138
|
+
if name is None:
|
|
139
|
+
name = self.name
|
|
140
|
+
name = Path(name).with_suffix(".html")
|
|
141
|
+
with open(name, "wt", encoding="utf8") as f:
|
|
142
|
+
f.write(self.html(**kwargs))
|
|
143
|
+
|
|
144
|
+
def show_html(self, **kwargs):
|
|
145
|
+
display(HTML(self.html(**kwargs)))
|
|
146
|
+
|
|
131
147
|
|
|
132
148
|
class ListLab(Lab):
|
|
133
149
|
"""
|
gismap/lab/lab_author.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
|
|
3
|
+
from gismap import get_classes, HAL, DBLP
|
|
4
|
+
from gismap.sources.models import DB
|
|
5
|
+
from gismap.sources.multi import SourcedAuthor, sort_author_sources
|
|
6
|
+
from gismap.utils.common import LazyRepr, list_of_objects
|
|
7
|
+
from gismap.utils.logger import logger
|
|
8
|
+
|
|
9
|
+
db_dict = get_classes(DB, key="db_name")
|
|
10
|
+
default_dbs = [HAL, DBLP]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(repr=False)
|
|
14
|
+
class AuthorMetadata(LazyRepr):
|
|
15
|
+
"""
|
|
16
|
+
Optional information about an author to be used to enhance her presentation.
|
|
17
|
+
|
|
18
|
+
Attributes
|
|
19
|
+
----------
|
|
20
|
+
|
|
21
|
+
url: :class:`str`
|
|
22
|
+
Homepage of the author.
|
|
23
|
+
img: :class:`str`
|
|
24
|
+
Url to a picture.
|
|
25
|
+
group: :class:`str`
|
|
26
|
+
Group of the author.
|
|
27
|
+
position: :class:`tuple`
|
|
28
|
+
Coordinates of the author.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
url: str = None
|
|
32
|
+
img: str = None
|
|
33
|
+
group: str = None
|
|
34
|
+
position: tuple = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(repr=False)
|
|
38
|
+
class LabAuthor(SourcedAuthor):
|
|
39
|
+
metadata: AuthorMetadata = field(default_factory=AuthorMetadata)
|
|
40
|
+
|
|
41
|
+
def auto_img(self):
|
|
42
|
+
for source in self.sources:
|
|
43
|
+
img = getattr(source, "img", None)
|
|
44
|
+
if img is not None:
|
|
45
|
+
self.metadata.img = img
|
|
46
|
+
break
|
|
47
|
+
|
|
48
|
+
def auto_sources(self, dbs=None):
|
|
49
|
+
"""
|
|
50
|
+
Automatically populate the sources based on author's name.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
dbs: :class:`list`, default=[:class:`~gismap.sources.hal.HAL`, :class:`~gismap.sources.dblp.DBLP`]
|
|
55
|
+
List of DB sources to use.
|
|
56
|
+
|
|
57
|
+
Returns
|
|
58
|
+
-------
|
|
59
|
+
None
|
|
60
|
+
"""
|
|
61
|
+
dbs = list_of_objects(dbs, db_dict, default=default_dbs)
|
|
62
|
+
sources = []
|
|
63
|
+
for db in dbs:
|
|
64
|
+
source = db.search_author(self.name)
|
|
65
|
+
if len(source) == 0:
|
|
66
|
+
logger.warning(f"{self.name} not found in {db.db_name}")
|
|
67
|
+
elif len(source) > 1:
|
|
68
|
+
logger.warning(f"Multiple entries for {self.name} in {db.db_name}")
|
|
69
|
+
sources += source
|
|
70
|
+
if len(sources) > 0:
|
|
71
|
+
self.sources = sort_author_sources(sources)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def labify_author(author, rosetta):
|
|
75
|
+
if isinstance(author, LabAuthor):
|
|
76
|
+
return author
|
|
77
|
+
return rosetta.get(author.key, author)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def labify_publications(pubs, rosetta):
|
|
81
|
+
for pub in pubs:
|
|
82
|
+
pub.authors = [labify_author(a, rosetta) for a in pub.authors]
|
|
83
|
+
for source in getattr(pub, "sources", []):
|
|
84
|
+
source.authors = [labify_author(a, rosetta) for a in pub.authors]
|
gismap/lab/lip6.py
CHANGED
gismap/lab/toulouse.py
CHANGED