gismap 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gismap/__init__.py +1 -0
- gismap/gisgraphs/__init__.py +0 -0
- gismap/gisgraphs/builder.py +105 -0
- gismap/{lab → gisgraphs}/graph.py +79 -69
- gismap/gisgraphs/groups.py +70 -0
- gismap/gisgraphs/js.py +190 -0
- gismap/gisgraphs/options.py +37 -0
- gismap/gisgraphs/style.py +119 -0
- gismap/gisgraphs/widget.py +145 -0
- gismap/gismo.py +2 -2
- gismap/lab/__init__.py +3 -7
- gismap/lab/egomap.py +17 -18
- gismap/lab/expansion.py +10 -9
- gismap/lab/filters.py +84 -9
- gismap/lab/lab_author.py +51 -4
- gismap/lab/{lab.py → labmap.py} +33 -12
- gismap/lab_examples/__init__.py +0 -0
- gismap/lab_examples/cedric.py +46 -0
- gismap/lab_examples/lincs.py +44 -0
- gismap/{lab → lab_examples}/lip6.py +8 -4
- gismap/{lab → lab_examples}/toulouse.py +23 -6
- gismap/sources/dblp.py +15 -17
- gismap/sources/hal.py +17 -8
- gismap/sources/models.py +7 -0
- gismap/sources/multi.py +27 -17
- gismap/utils/requests.py +4 -2
- {gismap-0.2.1.dist-info → gismap-0.3.0.dist-info}/METADATA +21 -5
- gismap-0.3.0.dist-info/RECORD +38 -0
- gismap/lab/vis.py +0 -329
- gismap-0.2.1.dist-info/RECORD +0 -29
- {gismap-0.2.1.dist-info → gismap-0.3.0.dist-info}/WHEEL +0 -0
- {gismap-0.2.1.dist-info → gismap-0.3.0.dist-info}/licenses/AUTHORS.md +0 -0
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
from string import Template
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
# language=css
|
|
5
|
+
default_style = Template("""
|
|
6
|
+
|
|
7
|
+
#box-$uid {
|
|
8
|
+
position: relative;
|
|
9
|
+
width: 100% !important;
|
|
10
|
+
height: 80vh !important;
|
|
11
|
+
max-width: 100vw !important;
|
|
12
|
+
max-height: 80vh !important;
|
|
13
|
+
min-height: 80vh;
|
|
14
|
+
color: #111;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
#vis-$uid {
|
|
18
|
+
height: 100%; /* Make the inner div fill the parent */
|
|
19
|
+
width: 100%; /* Make the inner div fill the parent */
|
|
20
|
+
box-sizing: border-box;
|
|
21
|
+
border: 1px solid #444;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
html[data-theme="dark"] #vis-$uid {
|
|
25
|
+
background-color: var(--pst-color-background, #14181e);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
.modal {
|
|
29
|
+
display: none;
|
|
30
|
+
position: fixed;
|
|
31
|
+
z-index: 1000;
|
|
32
|
+
left: 0;
|
|
33
|
+
top: 0;
|
|
34
|
+
width: 100%;
|
|
35
|
+
height: 100%;
|
|
36
|
+
overflow: auto;
|
|
37
|
+
background-color: rgba(10, 10, 10, 0.85);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
.modal-content {
|
|
41
|
+
background-color: #f4f4f7;
|
|
42
|
+
color: #222235;
|
|
43
|
+
margin: 10% auto;
|
|
44
|
+
padding: 24px;
|
|
45
|
+
border: 1px solid #888;
|
|
46
|
+
width: 50%;
|
|
47
|
+
border-radius: 8px;
|
|
48
|
+
box-shadow: 0 5px 15px rgba(0, 0, 0, .6);
|
|
49
|
+
}
|
|
50
|
+
.modal a {color: #2958d7;}
|
|
51
|
+
.modal a:visited {color: #8435a8;}
|
|
52
|
+
|
|
53
|
+
.close {
|
|
54
|
+
color: #777;
|
|
55
|
+
float: right;
|
|
56
|
+
font-size: 28px;
|
|
57
|
+
font-weight: bold;
|
|
58
|
+
cursor: pointer;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
.close:hover, .close:focus {
|
|
62
|
+
color: #aaa;
|
|
63
|
+
text-decoration: none;
|
|
64
|
+
cursor: pointer;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
.watermark {
|
|
68
|
+
position: absolute;
|
|
69
|
+
text-decoration: none;
|
|
70
|
+
color: #888;
|
|
71
|
+
font-size: min(2vw, 10px);
|
|
72
|
+
z-index: 10;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
.gislink {
|
|
76
|
+
left: 10px;
|
|
77
|
+
bottom: 10px;
|
|
78
|
+
pointer-events: auto;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
.button {
|
|
82
|
+
background: none;
|
|
83
|
+
border: none;
|
|
84
|
+
padding: 0;
|
|
85
|
+
margin: 0;
|
|
86
|
+
cursor: pointer;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
.redraw {
|
|
90
|
+
left: 10px;
|
|
91
|
+
top: 10px;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
.fullscreen {
|
|
95
|
+
bottom: 10px;
|
|
96
|
+
right: 10px;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
.legend {
|
|
100
|
+
display: inline-block;
|
|
101
|
+
padding: 10px 16px;
|
|
102
|
+
border-radius: 8px;
|
|
103
|
+
box-shadow: 0 2px 8px 0 rgba(0, 0, 0, 0.10);
|
|
104
|
+
border: 1px solid var(--legend-border, #bbb);
|
|
105
|
+
background: var(--jp-layout-color1, #f5f5fa);
|
|
106
|
+
background-color: var(--legend-bg, rgba(240, 240, 245, 0.95));
|
|
107
|
+
position: absolute;
|
|
108
|
+
top: 12px;
|
|
109
|
+
right: 12px;
|
|
110
|
+
z-index: 20;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
.legend-entry {
|
|
114
|
+
display: flex;
|
|
115
|
+
margin-right: 10px;
|
|
116
|
+
align-items: center;
|
|
117
|
+
cursor: pointer;
|
|
118
|
+
}
|
|
119
|
+
""")
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import unicodedata
|
|
3
|
+
import base64
|
|
4
|
+
from IPython.display import display, HTML
|
|
5
|
+
import ipywidgets as widgets
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
|
|
8
|
+
from gismap.lab.egomap import EgoMap
|
|
9
|
+
from gismap.lab.labmap import ListMap
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@contextmanager
|
|
13
|
+
def dummy_context():
|
|
14
|
+
yield
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def safe_filename(name):
|
|
18
|
+
"""
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
name: :class:`str`
|
|
22
|
+
Pretty much anything.
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
:class:`str`
|
|
27
|
+
GisMap filename.
|
|
28
|
+
"""
|
|
29
|
+
normalized = unicodedata.normalize("NFKD", name)
|
|
30
|
+
ascii_only = normalized.encode("ascii", "ignore").decode("ascii")
|
|
31
|
+
ascii_only = ascii_only.replace(" ", "_")
|
|
32
|
+
safe_str = re.sub(r"[^a-zA-Z0-9_]", "", ascii_only)
|
|
33
|
+
return f"gismap-{safe_str[:60]}.html"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
place_holder = "Diego Perino, The-Dang Huynh, François Durand (hal: fradurand, dblp: 38/11269), Rim Kaddah, Leonardo Linguaglossa, Céline Comte"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class GismapWidget:
|
|
40
|
+
"""
|
|
41
|
+
A simple widget to test the production of LabMaps and EgoMaps.
|
|
42
|
+
|
|
43
|
+
Examples
|
|
44
|
+
--------
|
|
45
|
+
|
|
46
|
+
This is a doctest example. Use a notebook to play with the widget.
|
|
47
|
+
|
|
48
|
+
>>> gw = GismapWidget() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
|
|
49
|
+
VBox(children=(HTML(value=''), Output(), HBox(children=(Textarea(value='', ...
|
|
50
|
+
>>> gw.names.value = "Fabien Mathieu"
|
|
51
|
+
>>> gw.dbs.value = "HAL"
|
|
52
|
+
>>> gw.size.value = 3
|
|
53
|
+
>>> gw.compute_function(gw.compute, show=False)
|
|
54
|
+
>>> gw.save_link.value[:30]
|
|
55
|
+
"<a href='data:text/html;base64"
|
|
56
|
+
>>> gw.names.value = "Diego Perino, Laurent Viennot"
|
|
57
|
+
>>> gw.compute_function(gw.compute, show=False)
|
|
58
|
+
>>> gw.save_link.value[:30]
|
|
59
|
+
"<a href='data:text/html;base64"
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self):
|
|
63
|
+
self.names = widgets.Textarea(
|
|
64
|
+
placeholder=place_holder,
|
|
65
|
+
description="Name(s):",
|
|
66
|
+
layout=widgets.Layout(width="50%", height="100px"),
|
|
67
|
+
)
|
|
68
|
+
self.dbs = widgets.RadioButtons(
|
|
69
|
+
options=["HAL", "DBLP", "Both"],
|
|
70
|
+
description="DB(s):",
|
|
71
|
+
layout=widgets.Layout(width="80px", max_width="20%"),
|
|
72
|
+
)
|
|
73
|
+
self.size = widgets.IntSlider(
|
|
74
|
+
value=10,
|
|
75
|
+
min=1,
|
|
76
|
+
max=150,
|
|
77
|
+
step=1,
|
|
78
|
+
description="Size",
|
|
79
|
+
layout=widgets.Layout(width="250px"),
|
|
80
|
+
)
|
|
81
|
+
self.compute = widgets.Button(
|
|
82
|
+
description="Map!", layout=widgets.Layout(width="120px", max_width="140px")
|
|
83
|
+
)
|
|
84
|
+
self._col = widgets.VBox(
|
|
85
|
+
[self.size, self.compute],
|
|
86
|
+
layout=widgets.Layout(
|
|
87
|
+
align_items="center", max_width="27%", overflow="hidden"
|
|
88
|
+
),
|
|
89
|
+
)
|
|
90
|
+
self.save_link = widgets.HTML(value="")
|
|
91
|
+
self.compute.on_click(self.compute_function)
|
|
92
|
+
self.out = widgets.Output()
|
|
93
|
+
self.widget = widgets.VBox(
|
|
94
|
+
[self.save_link, self.out, widgets.HBox([self.names, self._col, self.dbs])]
|
|
95
|
+
)
|
|
96
|
+
display(self.widget)
|
|
97
|
+
self.show = True
|
|
98
|
+
|
|
99
|
+
def html(self):
|
|
100
|
+
dbs = (
|
|
101
|
+
"hal"
|
|
102
|
+
if self.dbs.value == "HAL"
|
|
103
|
+
else "dblp"
|
|
104
|
+
if self.dbs.value == "DBLP"
|
|
105
|
+
else ["hal", "dblp"]
|
|
106
|
+
)
|
|
107
|
+
name = self.names.value
|
|
108
|
+
pattern = r",\s*(?![^()]*\))"
|
|
109
|
+
names = [n.strip() for n in re.split(pattern, name)]
|
|
110
|
+
self.save_link.value = ""
|
|
111
|
+
ctx = self.out if self.show else dummy_context()
|
|
112
|
+
if len(names) > 1:
|
|
113
|
+
lab = ListMap(names, dbs=dbs, name="planet")
|
|
114
|
+
if self.show:
|
|
115
|
+
self.out.clear_output()
|
|
116
|
+
with ctx:
|
|
117
|
+
lab.update_authors()
|
|
118
|
+
lab.update_publis()
|
|
119
|
+
extra = self.size.value - len(lab.authors)
|
|
120
|
+
if extra > 0:
|
|
121
|
+
lab.expand(target=extra)
|
|
122
|
+
else:
|
|
123
|
+
lab = EgoMap(names[0], dbs=dbs)
|
|
124
|
+
if self.show:
|
|
125
|
+
self.out.clear_output()
|
|
126
|
+
with ctx:
|
|
127
|
+
lab.build(target=self.size.value)
|
|
128
|
+
return lab.html()
|
|
129
|
+
|
|
130
|
+
def compute_function(self, b, show=True):
|
|
131
|
+
self.show = show
|
|
132
|
+
full = self.html()
|
|
133
|
+
b64 = base64.b64encode(
|
|
134
|
+
f"<html><body>{full}</body></html>".encode("utf8")
|
|
135
|
+
).decode("utf8")
|
|
136
|
+
payload = f"data:text/html;base64,{b64}"
|
|
137
|
+
savename = safe_filename(self.names.value)
|
|
138
|
+
link_html = (
|
|
139
|
+
f"<a href='{payload}' download='{savename}'>Download the Map!</a>"
|
|
140
|
+
)
|
|
141
|
+
self.save_link.value = link_html
|
|
142
|
+
if show:
|
|
143
|
+
self.out.clear_output()
|
|
144
|
+
with self.out:
|
|
145
|
+
display(HTML(full))
|
gismap/gismo.py
CHANGED
|
@@ -11,7 +11,7 @@ def make_post_publi(lab):
|
|
|
11
11
|
|
|
12
12
|
Parameters
|
|
13
13
|
----------
|
|
14
|
-
lab: :class:`~gismap.lab.
|
|
14
|
+
lab: :class:`~gismap.lab.labmap.LabMap`
|
|
15
15
|
Lab that contains the corpus publications.
|
|
16
16
|
|
|
17
17
|
Returns
|
|
@@ -32,7 +32,7 @@ def make_gismo(lab, vectorizer_parameters=None):
|
|
|
32
32
|
|
|
33
33
|
Parameters
|
|
34
34
|
----------
|
|
35
|
-
lab: :class:`~gismap.lab.
|
|
35
|
+
lab: :class:`~gismap.lab.labmap.LabMap`
|
|
36
36
|
Lab that contains publications.
|
|
37
37
|
vectorizer_parameters: :class:`dict`
|
|
38
38
|
Overriding parameters for the Countvectorizer of the gismo.
|
gismap/lab/__init__.py
CHANGED
|
@@ -1,10 +1,6 @@
|
|
|
1
|
-
from gismap.lab.
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
from gismap.lab.labmap import (
|
|
2
|
+
LabMap as Map,
|
|
3
|
+
ListMap as ListMap,
|
|
4
4
|
)
|
|
5
|
-
from gismap.lab.lip6 import Lip6 as Lip6, Lip6Lab as Lip6Lab
|
|
6
|
-
from gismap.lab.toulouse import Solace as Solace, LaasLab as LaasLab
|
|
7
|
-
from gismap.lab.graph import lab2graph as lab2graph
|
|
8
|
-
from gismap.lab.vis import generate_html as generate_html
|
|
9
5
|
from gismap.lab.egomap import EgoMap as EgoMap
|
|
10
6
|
from gismap.lab.lab_author import LabAuthor as LabAuthor
|
gismap/lab/egomap.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
from gismap.lab.
|
|
1
|
+
from gismap.lab.labmap import LabMap
|
|
2
2
|
from gismap.lab.lab_author import LabAuthor
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class EgoMap(
|
|
5
|
+
class EgoMap(LabMap):
|
|
6
6
|
"""
|
|
7
7
|
Parameters
|
|
8
8
|
----------
|
|
9
|
-
|
|
9
|
+
star
|
|
10
10
|
args
|
|
11
11
|
kwargs
|
|
12
12
|
|
|
@@ -15,28 +15,27 @@ class EgoMap(Lab):
|
|
|
15
15
|
|
|
16
16
|
>>> dang = EgoMap("The-Dang Huynh", dbs="hal")
|
|
17
17
|
>>> dang.build(target=10)
|
|
18
|
-
>>> sorted(a.name for a in dang.authors.values()) # doctest: +NORMALIZE_WHITESPACE
|
|
19
|
-
['Bruno Kauffmann', 'Chung Shue Chen', 'Fabien Mathieu'
|
|
20
|
-
'Siu-Wai Ho', 'Sébastien Tixeuil', 'The-Dang Huynh', 'Yannick Carlinet']
|
|
18
|
+
>>> sorted(a.name for a in dang.authors.values()) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
|
|
19
|
+
['Bruno Kauffmann', 'Chung Shue Chen', 'Fabien Mathieu',...
|
|
21
20
|
"""
|
|
22
21
|
|
|
23
|
-
def __init__(self,
|
|
24
|
-
if isinstance(
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
22
|
+
def __init__(self, star, *args, **kwargs):
|
|
23
|
+
if isinstance(star, str):
|
|
24
|
+
star = LabAuthor(star)
|
|
25
|
+
star.metadata.position = (0, 0)
|
|
26
|
+
star.metadata.group = "star"
|
|
27
|
+
self.star = star
|
|
28
28
|
super().__init__(*args, **kwargs)
|
|
29
29
|
|
|
30
30
|
def _author_iterator(self):
|
|
31
|
-
yield self.
|
|
31
|
+
yield self.star
|
|
32
32
|
|
|
33
33
|
def build(self, **kwargs):
|
|
34
34
|
target = kwargs.pop("target", 50)
|
|
35
|
-
|
|
36
|
-
self.
|
|
37
|
-
self.
|
|
35
|
+
self.update_authors(desc="Star metadata")
|
|
36
|
+
self.update_publis(desc="Star publications")
|
|
37
|
+
kwargs["target"] = target - len(self.authors)
|
|
38
|
+
self.expand(group="planet", desc="Planets", **kwargs)
|
|
38
39
|
kwargs["target"] = target - len(self.authors)
|
|
39
|
-
self.expand(group=None, desc="Planets", **kwargs)
|
|
40
|
-
kwargs.update({"target": target - len(self.authors), "group": group})
|
|
41
40
|
if kwargs["target"] > 0:
|
|
42
|
-
self.expand(desc="Moons", **kwargs)
|
|
41
|
+
self.expand(group="moon", desc="Moons", **kwargs)
|
gismap/lab/expansion.py
CHANGED
|
@@ -53,7 +53,7 @@ def count_prospect_entries(lab):
|
|
|
53
53
|
|
|
54
54
|
Parameters
|
|
55
55
|
----------
|
|
56
|
-
lab: :class:`~gismap.lab.
|
|
56
|
+
lab: :class:`~gismap.lab.labmap.LabMap`
|
|
57
57
|
Reference lab.
|
|
58
58
|
|
|
59
59
|
Returns
|
|
@@ -73,8 +73,8 @@ def count_prospect_entries(lab):
|
|
|
73
73
|
count_publications.append(a.key)
|
|
74
74
|
else:
|
|
75
75
|
lab_authors.add(a.key)
|
|
76
|
-
for
|
|
77
|
-
count_coauthors[
|
|
76
|
+
for a in lab_authors:
|
|
77
|
+
count_coauthors[a].update(new_authors)
|
|
78
78
|
|
|
79
79
|
count_coauthors = Counter(
|
|
80
80
|
k for new_authors in count_coauthors.values() for k in new_authors
|
|
@@ -117,7 +117,7 @@ def get_prospects(lab):
|
|
|
117
117
|
"""
|
|
118
118
|
Parameters
|
|
119
119
|
----------
|
|
120
|
-
lab: :class:`~gismap.lab.
|
|
120
|
+
lab: :class:`~gismap.lab.labmap.LabMap`
|
|
121
121
|
Reference lab.
|
|
122
122
|
|
|
123
123
|
Returns
|
|
@@ -150,7 +150,7 @@ def get_member_names(lab):
|
|
|
150
150
|
"""
|
|
151
151
|
Parameters
|
|
152
152
|
----------
|
|
153
|
-
lab: :class:`~gismap.lab.
|
|
153
|
+
lab: :class:`~gismap.lab.labmap.LabMap`
|
|
154
154
|
Reference lab.
|
|
155
155
|
|
|
156
156
|
Returns
|
|
@@ -222,12 +222,13 @@ def proper_prospects(
|
|
|
222
222
|
locs = [j for j in np.where(jc[i, :] > threshold)[0] if not done[j]]
|
|
223
223
|
done[locs] = True
|
|
224
224
|
sources = sort_author_sources([prospects[j].author for j in locs])
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
225
|
+
if sources:
|
|
226
|
+
strength = sum(prospects[j].score for j in locs)
|
|
227
|
+
new_author = LabAuthor.from_sources(sources)
|
|
228
|
+
new_lab.append((strength, new_author))
|
|
228
229
|
|
|
229
230
|
# Extract top prospects
|
|
230
|
-
new_lab = [
|
|
231
|
+
new_lab = [a[1] for a in sorted(new_lab, key=lambda a: a[0], reverse=True)][
|
|
231
232
|
:max_new
|
|
232
233
|
]
|
|
233
234
|
new_rosetta = {s.key: a for a in new_lab for s in a.sources}
|
gismap/lab/filters.py
CHANGED
|
@@ -1,18 +1,93 @@
|
|
|
1
|
-
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
# editorials = re.compile(r"ditorial|foreword", re.IGNORECASE)
|
|
4
|
+
# charlatans = re.compile(r"Raoult|Kofman|Buob")
|
|
5
|
+
|
|
6
|
+
editorials = ["ditorial", "Foreword", "Brief Announcement", "Preface", "Préface"]
|
|
7
|
+
charlatans = ["Raoult", "Kofman", "Buob"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def re_filter(words):
|
|
11
|
+
"""
|
|
12
|
+
Parameters
|
|
13
|
+
----------
|
|
14
|
+
words: :class:`list` or :class:`str`
|
|
15
|
+
List of word(s) to filter.
|
|
16
|
+
|
|
17
|
+
Returns
|
|
18
|
+
-------
|
|
19
|
+
callable
|
|
20
|
+
Filter function.
|
|
21
|
+
"""
|
|
2
22
|
if isinstance(words, str):
|
|
3
|
-
|
|
23
|
+
taboo = re.compile(words)
|
|
4
24
|
else:
|
|
5
|
-
|
|
25
|
+
taboo = re.compile("|".join(words))
|
|
26
|
+
return lambda txt: taboo.search(txt) is None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def publication_size_filter(n_max=9):
|
|
30
|
+
"""
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
n_max: int, default=9
|
|
34
|
+
Maximum number of co-authors allowed.
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
callable
|
|
39
|
+
Filter on number of co-authors.
|
|
40
|
+
"""
|
|
41
|
+
return lambda p: len(p.authors) <= n_max
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def publication_oneword_filter(n_min=2):
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
Parameters
|
|
48
|
+
----------
|
|
49
|
+
n_min: int, default=2
|
|
50
|
+
Minimum number of words required in the title.
|
|
51
|
+
|
|
52
|
+
Returns
|
|
53
|
+
-------
|
|
54
|
+
callable
|
|
55
|
+
Filter on number of words required in the title.
|
|
56
|
+
"""
|
|
57
|
+
return lambda p: len(p.title.split()) >= n_min
|
|
6
58
|
|
|
7
|
-
def publication_size_filter(n_max=10):
|
|
8
|
-
return lambda p: len(p.authors) < n_max
|
|
9
59
|
|
|
10
60
|
def publication_taboo_filter(w=None):
|
|
61
|
+
"""
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
w: :class:`list`, optional
|
|
65
|
+
List of words to filter.
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
Callable
|
|
70
|
+
Filter function on publications.
|
|
71
|
+
"""
|
|
11
72
|
if w is None:
|
|
12
|
-
w =
|
|
13
|
-
|
|
73
|
+
w = editorials
|
|
74
|
+
regex = re_filter(w)
|
|
75
|
+
return lambda p: regex(p.title)
|
|
76
|
+
|
|
14
77
|
|
|
15
78
|
def author_taboo_filter(w=None):
|
|
79
|
+
"""
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
w: :class:`list`, optional
|
|
83
|
+
List of words to filter.
|
|
84
|
+
|
|
85
|
+
Returns
|
|
86
|
+
-------
|
|
87
|
+
Callable
|
|
88
|
+
Filter function on authors.
|
|
89
|
+
"""
|
|
16
90
|
if w is None:
|
|
17
|
-
w =
|
|
18
|
-
|
|
91
|
+
w = charlatans
|
|
92
|
+
regex = re_filter(w)
|
|
93
|
+
return lambda p: regex(p.name)
|
gismap/lab/lab_author.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
|
+
import re
|
|
2
3
|
|
|
3
4
|
from gismap import get_classes, HAL, DBLP
|
|
4
|
-
from gismap.sources.models import DB
|
|
5
|
+
from gismap.sources.models import DB, db_class_to_auth_class
|
|
5
6
|
from gismap.sources.multi import SourcedAuthor, sort_author_sources
|
|
6
7
|
from gismap.utils.common import LazyRepr, list_of_objects
|
|
7
8
|
from gismap.utils.logger import logger
|
|
@@ -26,6 +27,8 @@ class AuthorMetadata(LazyRepr):
|
|
|
26
27
|
Group of the author.
|
|
27
28
|
position: :class:`tuple`
|
|
28
29
|
Coordinates of the author.
|
|
30
|
+
keys: :class:`dict`
|
|
31
|
+
Some DB key values of the author.
|
|
29
32
|
"""
|
|
30
33
|
|
|
31
34
|
url: str = None
|
|
@@ -36,6 +39,26 @@ class AuthorMetadata(LazyRepr):
|
|
|
36
39
|
|
|
37
40
|
@dataclass(repr=False)
|
|
38
41
|
class LabAuthor(SourcedAuthor):
|
|
42
|
+
"""
|
|
43
|
+
Examples
|
|
44
|
+
---------
|
|
45
|
+
The metadata and DB key(s) of an author can be entered in parentheses using key/values.
|
|
46
|
+
|
|
47
|
+
Improper key/values are ignored (with a warning).
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
>>> dummy= LabAuthor("My Name(img: https://my.url.img, group:me,url:https://mysite.org,hal:key1,dblp:toto,badkey:hello,no_colon_separator)")
|
|
51
|
+
>>> dummy.metadata
|
|
52
|
+
AuthorMetadata(url='https://mysite.org', img='https://my.url.img', group='me')
|
|
53
|
+
>>> dummy.sources
|
|
54
|
+
[HALAuthor(name='My Name', key='key1'), DBLPAuthor(name='My Name', key='toto')]
|
|
55
|
+
|
|
56
|
+
You can enter multiple keys for the same DB. HAL key types are automatically detected.
|
|
57
|
+
|
|
58
|
+
>>> dummy2= LabAuthor("My Name (hal:key1,hal:123456,hal: My Other Name )")
|
|
59
|
+
>>> dummy2.sources
|
|
60
|
+
[HALAuthor(name='My Name', key='key1'), HALAuthor(name='My Name', key='123456', key_type='pid'), HALAuthor(name='My Name', key='My Other Name', key_type='fullname')]
|
|
61
|
+
"""
|
|
39
62
|
metadata: AuthorMetadata = field(default_factory=AuthorMetadata)
|
|
40
63
|
|
|
41
64
|
def auto_img(self):
|
|
@@ -45,6 +68,30 @@ class LabAuthor(SourcedAuthor):
|
|
|
45
68
|
self.metadata.img = img
|
|
46
69
|
break
|
|
47
70
|
|
|
71
|
+
def __post_init__(self):
|
|
72
|
+
pattern = r"\s*([^,(]+)\s*(?:\(([^)]*)\))?\s*$"
|
|
73
|
+
match = re.match(pattern, self.name)
|
|
74
|
+
if match:
|
|
75
|
+
self.name = match.group(1).strip()
|
|
76
|
+
content = match.group(2)
|
|
77
|
+
if content:
|
|
78
|
+
for kv in content.split(","):
|
|
79
|
+
if ":" not in kv:
|
|
80
|
+
logger.warning(f"I don't know what to do with {kv}.")
|
|
81
|
+
continue
|
|
82
|
+
k, v = kv.split(":", 1)
|
|
83
|
+
k = k.strip().lower()
|
|
84
|
+
v = v.strip()
|
|
85
|
+
if k in db_dict:
|
|
86
|
+
DBAuthor = db_class_to_auth_class(db_dict[k])
|
|
87
|
+
self.sources.append(DBAuthor(name=self.name, key=v))
|
|
88
|
+
elif k in ["url", "img", "group"]:
|
|
89
|
+
setattr(self.metadata, k, v)
|
|
90
|
+
else:
|
|
91
|
+
logger.warning(f"I don't know what to do with {kv}.")
|
|
92
|
+
else:
|
|
93
|
+
self.name = self.name.strip()
|
|
94
|
+
|
|
48
95
|
def auto_sources(self, dbs=None):
|
|
49
96
|
"""
|
|
50
97
|
Automatically populate the sources based on author's name.
|
|
@@ -63,9 +110,9 @@ class LabAuthor(SourcedAuthor):
|
|
|
63
110
|
for db in dbs:
|
|
64
111
|
source = db.search_author(self.name)
|
|
65
112
|
if len(source) == 0:
|
|
66
|
-
logger.
|
|
113
|
+
logger.info(f"{self.name} not found in {db.db_name}")
|
|
67
114
|
elif len(source) > 1:
|
|
68
|
-
logger.
|
|
115
|
+
logger.info(f"Multiple entries for {self.name} in {db.db_name}")
|
|
69
116
|
sources += source
|
|
70
117
|
if len(sources) > 0:
|
|
71
118
|
self.sources = sort_author_sources(sources)
|
|
@@ -74,7 +121,7 @@ class LabAuthor(SourcedAuthor):
|
|
|
74
121
|
def labify_author(author, rosetta):
|
|
75
122
|
if isinstance(author, LabAuthor):
|
|
76
123
|
return author
|
|
77
|
-
return rosetta.get(author.key, author)
|
|
124
|
+
return rosetta.get(author.key, rosetta.get(author.name, author))
|
|
78
125
|
|
|
79
126
|
|
|
80
127
|
def labify_publications(pubs, rosetta):
|