gismap 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gismap/__init__.py +2 -0
- gismap/lab/__init__.py +2 -2
- gismap/lab/egomap.py +42 -0
- gismap/lab/expansion.py +240 -0
- gismap/lab/filters.py +18 -0
- gismap/lab/graph.py +11 -8
- gismap/lab/lab.py +86 -70
- gismap/lab/lab_author.py +84 -0
- gismap/lab/lip6.py +2 -1
- gismap/lab/toulouse.py +2 -1
- gismap/lab/vis.py +183 -25
- gismap/sources/dblp.py +26 -22
- gismap/sources/hal.py +47 -11
- gismap/sources/multi.py +68 -31
- gismap/utils/common.py +47 -2
- gismap/utils/requests.py +24 -11
- gismap/utils/text.py +66 -1
- {gismap-0.1.0.dist-info → gismap-0.2.1.dist-info}/METADATA +6 -4
- gismap-0.2.1.dist-info/RECORD +29 -0
- gismap-0.1.0.dist-info/RECORD +0 -25
- {gismap-0.1.0.dist-info → gismap-0.2.1.dist-info}/WHEEL +0 -0
- {gismap-0.1.0.dist-info → gismap-0.2.1.dist-info}/licenses/AUTHORS.md +0 -0
gismap/lab/vis.py
CHANGED
|
@@ -4,48 +4,154 @@ import json
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
vis_template = Template("""
|
|
7
|
+
<div class="gismap-content">
|
|
7
8
|
<div id="${container_id}"></div>
|
|
9
|
+
<a
|
|
10
|
+
href="https://balouf.github.io/gismap/"
|
|
11
|
+
target="_blank"
|
|
12
|
+
id="gismap-brand"
|
|
13
|
+
style="position: absolute; left: 10px; bottom: 10px; text-decoration: none; color: #888; font-size: min(2vw, 10px);
|
|
14
|
+
z-index: 10; pointer-events: auto;"
|
|
15
|
+
>
|
|
16
|
+
© Gismap 2025
|
|
17
|
+
</a>
|
|
8
18
|
<div id="${modal_id}" class="modal">
|
|
9
|
-
|
|
19
|
+
<div class="modal-content">
|
|
10
20
|
<span class="close" id="${modal_close_id}">×</span>
|
|
11
21
|
<div id="${modal_body_id}"></div>
|
|
12
22
|
</div>
|
|
13
23
|
</div>
|
|
24
|
+
</div>
|
|
14
25
|
<style>
|
|
26
|
+
.gismap-content {
|
|
27
|
+
position: relative;
|
|
28
|
+
width: 100%;
|
|
29
|
+
height: 80vh !important;
|
|
30
|
+
max-width: 100vw;
|
|
31
|
+
max-height: 100vh !important;
|
|
32
|
+
}
|
|
15
33
|
/* Styles adaptatifs pour dark/light */
|
|
34
|
+
/* Default: dark mode styles */
|
|
35
|
+
#${container_id} {
|
|
36
|
+
width: 100%;
|
|
37
|
+
height: 100%;
|
|
38
|
+
box-sizing: border-box;
|
|
39
|
+
border: 1px solid #444;
|
|
40
|
+
background: #181818;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
.modal {
|
|
44
|
+
display: none;
|
|
45
|
+
position: fixed;
|
|
46
|
+
z-index: 1000;
|
|
47
|
+
left: 0; top: 0;
|
|
48
|
+
width: 100%; height: 100%;
|
|
49
|
+
overflow: auto;
|
|
50
|
+
background-color: rgba(10,10,10,0.85);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
.modal-content {
|
|
54
|
+
background-color: #23272e;
|
|
55
|
+
color: #f0f0f0;
|
|
56
|
+
margin: 10% auto;
|
|
57
|
+
padding: 24px;
|
|
58
|
+
border: 1px solid #888;
|
|
59
|
+
width: 50%;
|
|
60
|
+
border-radius: 8px;
|
|
61
|
+
box-shadow: 0 5px 15px rgba(0,0,0,.6);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
.close {
|
|
65
|
+
color: #aaa;
|
|
66
|
+
float: right;
|
|
67
|
+
font-size: 28px;
|
|
68
|
+
font-weight: bold;
|
|
69
|
+
cursor: pointer;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
.close:hover, .close:focus {
|
|
73
|
+
color: #fff;
|
|
74
|
+
text-decoration: none;
|
|
75
|
+
cursor: pointer;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/* PyData Sphinx Light Theme */
|
|
79
|
+
html[data-theme="light"] #${container_id},
|
|
80
|
+
body[data-jp-theme-light="true"] #${container_id} {
|
|
81
|
+
background: #fff;
|
|
82
|
+
border: 1px solid #ccc;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
html[data-theme="light"] .modal,
|
|
86
|
+
body[data-jp-theme-light="true"] .modal {
|
|
87
|
+
background-color: rgba(220,220,220,0.85);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
html[data-theme="light"] .modal-content,
|
|
91
|
+
body[data-jp-theme-light="true"] .modal-content {
|
|
92
|
+
background: #fff;
|
|
93
|
+
color: #222;
|
|
94
|
+
border: 1px solid #888;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
html[data-theme="light"] .close,
|
|
98
|
+
body[data-jp-theme-light="true"] .close {
|
|
99
|
+
color: #222;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
html[data-theme="light"] .close:hover, html[data-theme="light"] .close:focus,
|
|
103
|
+
body[data-jp-theme-light="true"] .close:hover, body[data-jp-theme-light="true"] .close:focus {
|
|
104
|
+
color: #555;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/* Fallback: system light mode */
|
|
108
|
+
@media (prefers-color-scheme: light) {
|
|
16
109
|
#${container_id} {
|
|
17
|
-
|
|
18
|
-
|
|
110
|
+
background: #fff;
|
|
111
|
+
border: 1px solid #ccc;
|
|
19
112
|
}
|
|
20
113
|
.modal {
|
|
21
|
-
|
|
22
|
-
overflow: auto; background-color: rgba(10,10,10,0.85);
|
|
114
|
+
background-color: rgba(220,220,220,0.85);
|
|
23
115
|
}
|
|
24
116
|
.modal-content {
|
|
25
|
-
background
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
box-shadow: 0 5px 15px rgba(0,0,0,.6);
|
|
117
|
+
background: #fff;
|
|
118
|
+
color: #222;
|
|
119
|
+
border: 1px solid #888;
|
|
29
120
|
}
|
|
30
121
|
.close {
|
|
31
|
-
color: #
|
|
122
|
+
color: #222;
|
|
32
123
|
}
|
|
33
|
-
.close:hover, .close:focus {
|
|
34
|
-
|
|
35
|
-
#${container_id} { background: #fff; border: 1px solid #ccc; }
|
|
36
|
-
.modal { background-color: rgba(220,220,220,0.85); }
|
|
37
|
-
.modal-content { background: #fff; color: #222; border: 1px solid #888; }
|
|
38
|
-
.close { color: #222; }
|
|
39
|
-
.close:hover, .close:focus { color: #555; }
|
|
124
|
+
.close:hover, .close:focus {
|
|
125
|
+
color: #555;
|
|
40
126
|
}
|
|
127
|
+
}
|
|
41
128
|
</style>
|
|
42
|
-
<script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
|
|
43
129
|
<script type="text/javascript">
|
|
44
130
|
(function() {
|
|
45
131
|
// Détection du thème
|
|
46
|
-
|
|
47
|
-
|
|
132
|
+
function getTheme() {
|
|
133
|
+
// Try PyData Sphinx theme on <html>
|
|
134
|
+
const pydataTheme = document.documentElement.getAttribute("data-theme");
|
|
135
|
+
if (pydataTheme === "dark" || pydataTheme === "light") {
|
|
136
|
+
return pydataTheme;
|
|
48
137
|
}
|
|
138
|
+
|
|
139
|
+
// Try JupyterLab theme on <body>
|
|
140
|
+
const jupyterLabTheme = document.body.getAttribute("data-jp-theme-name");
|
|
141
|
+
if (jupyterLabTheme) {
|
|
142
|
+
// Simplify theme name to 'dark' or 'light'
|
|
143
|
+
const lowerName = jupyterLabTheme.toLowerCase();
|
|
144
|
+
if (lowerName.includes("dark")) {
|
|
145
|
+
return "dark";
|
|
146
|
+
}
|
|
147
|
+
if (lowerName.includes("light")) {
|
|
148
|
+
return "light";
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Fallback to system preference
|
|
153
|
+
return window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches ? "dark" : "light";
|
|
154
|
+
};
|
|
49
155
|
function getVisOptions(theme) {
|
|
50
156
|
if (theme === 'dark') {
|
|
51
157
|
return {
|
|
@@ -86,15 +192,41 @@ vis_template = Template("""
|
|
|
86
192
|
interaction: { hover: true }
|
|
87
193
|
};
|
|
88
194
|
}
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
var physics = {
|
|
198
|
+
physics: {
|
|
199
|
+
solver: "forceAtlas2Based",
|
|
200
|
+
forceAtlas2Based: {
|
|
201
|
+
gravitationalConstant: -50,
|
|
202
|
+
centralGravity: 0.01,
|
|
203
|
+
springLength: 200,
|
|
204
|
+
springConstant: 0.08,
|
|
205
|
+
damping: 0.98,
|
|
206
|
+
avoidOverlap: 1
|
|
207
|
+
},
|
|
208
|
+
maxVelocity: 10,
|
|
209
|
+
minVelocity: 0.9,
|
|
210
|
+
stabilization: {
|
|
211
|
+
enabled: true,
|
|
212
|
+
iterations: 2000,
|
|
213
|
+
updateInterval: 50,
|
|
214
|
+
onlyDynamicEdges: false,
|
|
215
|
+
fit: true
|
|
216
|
+
},
|
|
217
|
+
timestep: 0.25
|
|
89
218
|
}
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
function renderNetwork() {
|
|
90
222
|
const nodes = new vis.DataSet(${nodes_json});
|
|
91
223
|
const edges = new vis.DataSet(${edges_json});
|
|
92
224
|
const container = document.getElementById('${container_id}');
|
|
93
225
|
let network = null;
|
|
94
|
-
function renderNetwork() {
|
|
95
226
|
const theme = getTheme();
|
|
96
227
|
const options = getVisOptions(theme);
|
|
97
228
|
network = new vis.Network(container, { nodes: nodes, edges: edges }, options);
|
|
229
|
+
network.setOptions(physics)
|
|
98
230
|
// Tooltip survol
|
|
99
231
|
network.on("hoverNode", function(params) {
|
|
100
232
|
const node = nodes.get(params.node);
|
|
@@ -131,12 +263,38 @@ vis_template = Template("""
|
|
|
131
263
|
window.onclick = function(event) {
|
|
132
264
|
if (event.target == modal) { modal.style.display = "none"; }
|
|
133
265
|
};
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
function loadVisAndRender() {
|
|
269
|
+
if (typeof vis === 'undefined') {
|
|
270
|
+
var script = document.createElement('script');
|
|
271
|
+
script.src = "https://unpkg.com/vis-network/standalone/umd/vis-network.min.js";
|
|
272
|
+
script.type = "text/javascript";
|
|
273
|
+
script.onload = function() {
|
|
274
|
+
console.log("vis-network loaded dynamically");
|
|
275
|
+
renderNetwork(); // Graph init after vis is loaded
|
|
276
|
+
};
|
|
277
|
+
document.head.appendChild(script);
|
|
278
|
+
} else {
|
|
279
|
+
console.log("vis-network already loaded");
|
|
280
|
+
renderNetwork(); // Graph init immediately
|
|
134
281
|
}
|
|
135
|
-
|
|
282
|
+
}
|
|
283
|
+
loadVisAndRender();
|
|
284
|
+
|
|
136
285
|
// Adapter dynamiquement si le thème change
|
|
137
|
-
window.
|
|
138
|
-
|
|
139
|
-
|
|
286
|
+
window.addEventListener("theme-changed", () => loadVisAndRender());
|
|
287
|
+
const observer = new MutationObserver(mutations => {
|
|
288
|
+
for (const mutation of mutations) {
|
|
289
|
+
if (mutation.type === "attributes" && mutation.attributeName === "data-jp-theme-name") {
|
|
290
|
+
loadVisAndRender();
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
});
|
|
294
|
+
observer.observe(document.body, { attributes: true });
|
|
295
|
+
if (window.matchMedia) {
|
|
296
|
+
window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', () => loadVisAndRender());
|
|
297
|
+
};
|
|
140
298
|
})();
|
|
141
299
|
</script>
|
|
142
300
|
""")
|
gismap/sources/dblp.py
CHANGED
|
@@ -5,15 +5,15 @@ from bs4 import BeautifulSoup as Soup
|
|
|
5
5
|
from time import sleep
|
|
6
6
|
|
|
7
7
|
from gismap.sources.models import DB, Author, Publication
|
|
8
|
-
from gismap.utils.text import clean_aliases
|
|
8
|
+
from gismap.utils.text import clean_aliases, auto_int
|
|
9
9
|
from gismap.utils.requests import get
|
|
10
10
|
|
|
11
11
|
|
|
12
12
|
@dataclass(repr=False)
|
|
13
13
|
class DBLP(DB):
|
|
14
14
|
db_name: ClassVar[str] = "dblp"
|
|
15
|
-
author_backoff: ClassVar[float] =
|
|
16
|
-
publi_backoff: ClassVar[float] =
|
|
15
|
+
author_backoff: ClassVar[float] = 5.0
|
|
16
|
+
publi_backoff: ClassVar[float] = 1.0
|
|
17
17
|
|
|
18
18
|
@classmethod
|
|
19
19
|
def search_author(cls, name, wait=True):
|
|
@@ -84,17 +84,16 @@ class DBLP(DB):
|
|
|
84
84
|
authors=[DBLPAuthor(name='Yacine Boufkhad', key='75/5742'), DBLPAuthor(name='Fabien Mathieu', key='66/2077'),
|
|
85
85
|
DBLPAuthor(name='Fabien de Montgolfier', key='57/6313'), DBLPAuthor(name='Diego Perino', key='03/3645'),
|
|
86
86
|
DBLPAuthor(name='Laurent Viennot', key='v/LaurentViennot')],
|
|
87
|
-
venue='IPTPS', type='conference', year=2008, key='conf/iptps/BoufkhadMMPV08'
|
|
88
|
-
url='https://dblp.org/rec/conf/iptps/BoufkhadMMPV08.html', pages=4)
|
|
87
|
+
venue='IPTPS', type='conference', year=2008, key='conf/iptps/BoufkhadMMPV08')
|
|
89
88
|
>>> publications[-1] # doctest: +NORMALIZE_WHITESPACE
|
|
90
89
|
DBLPPublication(title='Upper Bounds for Stabilization in Acyclic Preference-Based Systems.',
|
|
91
90
|
authors=[DBLPAuthor(name='Fabien Mathieu', key='66/2077')], venue='SSS', type='conference', year=2007,
|
|
92
|
-
key='conf/sss/Mathieu07'
|
|
91
|
+
key='conf/sss/Mathieu07')
|
|
93
92
|
"""
|
|
94
93
|
r = get(f"https://dblp.org/pid/{a.key}.xml")
|
|
95
94
|
soup = Soup(r, features="xml")
|
|
96
95
|
if wait:
|
|
97
|
-
sleep(cls.
|
|
96
|
+
sleep(cls.publi_backoff)
|
|
98
97
|
res = [DBLPPublication.from_soup(r) for r in soup("r")]
|
|
99
98
|
return [p for p in res if p.authors]
|
|
100
99
|
|
|
@@ -110,7 +109,7 @@ class DBLPAuthor(Author, DBLP):
|
|
|
110
109
|
return f"https://dblp.org/pid/{self.key}.html"
|
|
111
110
|
return f"https://dblp.org/search?q={quote_plus(self.name)}"
|
|
112
111
|
|
|
113
|
-
def get_publications(self, wait=
|
|
112
|
+
def get_publications(self, wait=True):
|
|
114
113
|
return DBLP.from_author(self, wait=wait)
|
|
115
114
|
|
|
116
115
|
|
|
@@ -128,29 +127,27 @@ DBLP_TYPES = {
|
|
|
128
127
|
@dataclass(repr=False)
|
|
129
128
|
class DBLPPublication(Publication, DBLP):
|
|
130
129
|
key: str
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
130
|
+
metadata: dict = field(default_factory=dict)
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def url(self):
|
|
134
|
+
if self.key:
|
|
135
|
+
return f"https://dblp.org/rec/{self.key}.html"
|
|
136
|
+
else:
|
|
137
|
+
return None
|
|
135
138
|
|
|
136
139
|
@classmethod
|
|
137
140
|
def from_soup(cls, soup):
|
|
138
141
|
p = soup.find()
|
|
139
142
|
typ = p.get("publtype", p.name)
|
|
140
143
|
typ = DBLP_TYPES.get(typ, typ)
|
|
144
|
+
|
|
141
145
|
res = {
|
|
142
146
|
"type": typ,
|
|
143
147
|
"key": p["key"],
|
|
144
|
-
"
|
|
148
|
+
"title": p.title.text,
|
|
149
|
+
"year": int(p.year.text),
|
|
145
150
|
}
|
|
146
|
-
keys = ["title", "booktitle", "pages", "journal", "year", "volume", "number"]
|
|
147
|
-
for tag in keys:
|
|
148
|
-
t = p.find(tag)
|
|
149
|
-
if t:
|
|
150
|
-
try:
|
|
151
|
-
res[tag] = int(t.text)
|
|
152
|
-
except ValueError:
|
|
153
|
-
res[tag] = t.text
|
|
154
151
|
for tag in ["booktitle", "journal"]:
|
|
155
152
|
t = p.find(tag)
|
|
156
153
|
if t:
|
|
@@ -159,4 +156,11 @@ class DBLPPublication(Publication, DBLP):
|
|
|
159
156
|
else:
|
|
160
157
|
res["venue"] = "unpublished"
|
|
161
158
|
res["authors"] = [DBLPAuthor(key=a["pid"], name=a.text) for a in p("author")]
|
|
162
|
-
|
|
159
|
+
|
|
160
|
+
metadata = dict()
|
|
161
|
+
for tag in p.find_all(recursive=False):
|
|
162
|
+
name = tag.name
|
|
163
|
+
if name not in {"title", "year", "author", "booktitle", "journal"}:
|
|
164
|
+
metadata[name] = auto_int(tag.text)
|
|
165
|
+
|
|
166
|
+
return cls(**res, metadata=metadata)
|
gismap/sources/hal.py
CHANGED
|
@@ -2,6 +2,7 @@ from typing import ClassVar
|
|
|
2
2
|
from dataclasses import dataclass, field
|
|
3
3
|
from collections import defaultdict
|
|
4
4
|
from urllib.parse import quote_plus
|
|
5
|
+
from bs4 import BeautifulSoup as Soup
|
|
5
6
|
import json
|
|
6
7
|
|
|
7
8
|
from gismap.sources.models import DB, Publication, Author # DBAuthor, DBPublication
|
|
@@ -67,7 +68,7 @@ class HAL(DB):
|
|
|
67
68
|
elif "person_i" in a:
|
|
68
69
|
pids[a["person_i"]].add(a.get("label_s"))
|
|
69
70
|
elif "fullName_s" in a:
|
|
70
|
-
|
|
71
|
+
names.add(a["fullName_s"])
|
|
71
72
|
res = [
|
|
72
73
|
HALAuthor(name=name, key=k, aliases=clean_aliases(name, v))
|
|
73
74
|
for k, v in hids.items()
|
|
@@ -84,7 +85,7 @@ class HAL(DB):
|
|
|
84
85
|
HALAuthor(
|
|
85
86
|
name=name,
|
|
86
87
|
key=fullname,
|
|
87
|
-
aliases=
|
|
88
|
+
aliases=[],
|
|
88
89
|
key_type="fullname",
|
|
89
90
|
)
|
|
90
91
|
for fullname in names
|
|
@@ -117,7 +118,7 @@ class HAL(DB):
|
|
|
117
118
|
HALAuthor(name='Diego Perino', key='Diego Perino', key_type='fullname'),
|
|
118
119
|
HALAuthor(name='Laurent Viennot', key='laurentviennot')],
|
|
119
120
|
venue='Proceedings of the 7th Internnational Workshop on Peer-to-Peer Systems (IPTPS)', type='conference',
|
|
120
|
-
year=2008, key='471724'
|
|
121
|
+
year=2008, key='471724')
|
|
121
122
|
>>> diego = publications[2].authors[3]
|
|
122
123
|
>>> diego
|
|
123
124
|
HALAuthor(name='Diego Perino', key='Diego Perino', key_type='fullname')
|
|
@@ -127,7 +128,7 @@ class HAL(DB):
|
|
|
127
128
|
HALPublication(title='Upper bounds for stabilization in acyclic preference-based systems',
|
|
128
129
|
authors=[HALAuthor(name='Fabien Mathieu', key='fabien-mathieu')],
|
|
129
130
|
venue="SSS'07 - 9th international conference on Stabilization, Safety, and Security of Distributed Systems",
|
|
130
|
-
type='conference', year=2007, key='668356'
|
|
131
|
+
type='conference', year=2007, key='668356')
|
|
131
132
|
|
|
132
133
|
Case of someone with multiple ids one want to cumulate:
|
|
133
134
|
|
|
@@ -180,9 +181,30 @@ class HALAuthor(Author, HAL):
|
|
|
180
181
|
key: str | int = None
|
|
181
182
|
key_type: str = None
|
|
182
183
|
aliases: list = field(default_factory=list)
|
|
184
|
+
_url: str = None
|
|
185
|
+
_img: str = None
|
|
186
|
+
_cv: bool = None
|
|
187
|
+
|
|
188
|
+
def check_cv(self):
|
|
189
|
+
if self.key_type is not None:
|
|
190
|
+
self._cv = False
|
|
191
|
+
return None
|
|
192
|
+
url = f"https://cv.hal.science/{self.key}"
|
|
193
|
+
soup = Soup(get(url), "lxml")
|
|
194
|
+
if soup.form:
|
|
195
|
+
self._cv = False
|
|
196
|
+
return None
|
|
197
|
+
self._cv = True
|
|
198
|
+
self._url = url
|
|
199
|
+
try:
|
|
200
|
+
self._img = soup.main.section.div.div.div.img["src"]
|
|
201
|
+
except TypeError:
|
|
202
|
+
return None
|
|
183
203
|
|
|
184
204
|
@property
|
|
185
205
|
def url(self):
|
|
206
|
+
if self._url is not None:
|
|
207
|
+
return self._url
|
|
186
208
|
if self.key_type == "pid":
|
|
187
209
|
return f"https://hal.science/search/index/?q=*&authIdPerson_i={self.key}"
|
|
188
210
|
elif self.key_type == "fullname":
|
|
@@ -190,6 +212,12 @@ class HALAuthor(Author, HAL):
|
|
|
190
212
|
else:
|
|
191
213
|
return f"https://hal.science/search/index/?q=*&authIdHal_s={self.key}"
|
|
192
214
|
|
|
215
|
+
@property
|
|
216
|
+
def img(self):
|
|
217
|
+
if self._cv is None:
|
|
218
|
+
self.check_cv()
|
|
219
|
+
return self._img
|
|
220
|
+
|
|
193
221
|
def get_publications(self):
|
|
194
222
|
return HAL.from_author(self)
|
|
195
223
|
|
|
@@ -241,8 +269,11 @@ HAL_KEYS = {
|
|
|
241
269
|
@dataclass(repr=False)
|
|
242
270
|
class HALPublication(Publication, HAL):
|
|
243
271
|
key: str
|
|
244
|
-
|
|
245
|
-
|
|
272
|
+
metadata: dict = field(default_factory=dict)
|
|
273
|
+
|
|
274
|
+
@property
|
|
275
|
+
def url(self):
|
|
276
|
+
return self.metadata.get("url")
|
|
246
277
|
|
|
247
278
|
@classmethod
|
|
248
279
|
def from_json(cls, r):
|
|
@@ -258,15 +289,20 @@ class HALPublication(Publication, HAL):
|
|
|
258
289
|
:class:`~gismap.sources.hal.HALPublication`
|
|
259
290
|
|
|
260
291
|
"""
|
|
261
|
-
|
|
292
|
+
keys = {v: unlist(r[k]) for k, v in HAL_KEYS.items() if k in r}
|
|
293
|
+
res = {k: keys[k] for k in ["key", "title", "year"]}
|
|
294
|
+
# res = {v: unlist(r[k]) for k, v in HAL_KEYS.items() if k in r}
|
|
262
295
|
res["authors"] = [
|
|
263
296
|
parse_facet_author(a) for a in r.get("authFullNamePersonIDIDHal_fs", [])
|
|
264
297
|
]
|
|
265
298
|
for tag in ["booktitle", "journal", "conference"]:
|
|
266
|
-
if tag in
|
|
267
|
-
res["venue"] =
|
|
299
|
+
if tag in keys:
|
|
300
|
+
res["venue"] = keys[tag]
|
|
268
301
|
break
|
|
269
302
|
else:
|
|
270
303
|
res["venue"] = "unpublished"
|
|
271
|
-
res["type"] = HAL_TYPES.get(
|
|
272
|
-
|
|
304
|
+
res["type"] = HAL_TYPES.get(keys["type"], keys["type"].lower())
|
|
305
|
+
res["metadata"] = {
|
|
306
|
+
k: keys[k] for k in {"abstract", "url"} if k in keys and keys[k]
|
|
307
|
+
}
|
|
308
|
+
return cls(**res)
|
gismap/sources/multi.py
CHANGED
|
@@ -6,11 +6,22 @@ from gismap.sources.models import Publication, Author
|
|
|
6
6
|
from gismap.utils.text import clean_aliases
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
9
|
+
def score_author_source(dbauthor):
|
|
10
|
+
if dbauthor.db_name == "hal":
|
|
11
|
+
if dbauthor.key_type == "fullname":
|
|
12
|
+
return -1
|
|
13
|
+
elif dbauthor.key_type == "pid":
|
|
14
|
+
return 2
|
|
15
|
+
else:
|
|
16
|
+
return 3
|
|
17
|
+
elif dbauthor.db_name == "dblp":
|
|
18
|
+
return 1
|
|
19
|
+
else:
|
|
20
|
+
return 0
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def sort_author_sources(sources):
|
|
24
|
+
return sorted(sources, key=score_author_source, reverse=True)
|
|
14
25
|
|
|
15
26
|
|
|
16
27
|
@dataclass(repr=False)
|
|
@@ -35,10 +46,15 @@ class SourcedAuthor(Author):
|
|
|
35
46
|
|
|
36
47
|
@classmethod
|
|
37
48
|
def from_sources(cls, sources):
|
|
49
|
+
sources = sort_author_sources(sources)
|
|
38
50
|
return cls(name=sources[0].name, sources=sources)
|
|
39
51
|
|
|
40
|
-
def get_publications(self, clean=True):
|
|
41
|
-
|
|
52
|
+
def get_publications(self, clean=True, selector=None):
|
|
53
|
+
if selector is None:
|
|
54
|
+
selector = []
|
|
55
|
+
res = {
|
|
56
|
+
p.key: p for a in self.sources for p in a.get_publications() if all(f(p) for f in selector)
|
|
57
|
+
}
|
|
42
58
|
if clean:
|
|
43
59
|
regroup_authors({self.key: self}, res)
|
|
44
60
|
return regroup_publications(res)
|
|
@@ -46,30 +62,49 @@ class SourcedAuthor(Author):
|
|
|
46
62
|
return res
|
|
47
63
|
|
|
48
64
|
|
|
65
|
+
publication_score_rosetta = {
|
|
66
|
+
"db_name": {"dblp": 1, "hal": 2},
|
|
67
|
+
"venue": {"CoRR": -1, "unpublished": -2},
|
|
68
|
+
"type": {"conference": 1, "journal": 2},
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def score_publication_source(source):
|
|
73
|
+
scores = [
|
|
74
|
+
v.get(getattr(source, k, None), 0) for k, v in publication_score_rosetta.items()
|
|
75
|
+
]
|
|
76
|
+
scores.append(source.year)
|
|
77
|
+
return tuple(scores)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def sort_publication_sources(sources):
|
|
81
|
+
return sorted(sources, key=score_publication_source, reverse=True)
|
|
82
|
+
|
|
83
|
+
|
|
49
84
|
@dataclass(repr=False)
|
|
50
85
|
class SourcedPublication(Publication):
|
|
51
|
-
key: str
|
|
52
86
|
sources: list = field(default_factory=list)
|
|
53
87
|
|
|
88
|
+
@property
|
|
89
|
+
def key(self):
|
|
90
|
+
if self.sources:
|
|
91
|
+
return self.sources[0].key
|
|
92
|
+
else:
|
|
93
|
+
return None
|
|
94
|
+
|
|
54
95
|
@classmethod
|
|
55
96
|
def from_sources(cls, sources):
|
|
56
|
-
sources =
|
|
97
|
+
sources = sort_publication_sources(sources)
|
|
57
98
|
main = sources[0]
|
|
58
99
|
res = cls(
|
|
59
|
-
**{
|
|
100
|
+
**{
|
|
101
|
+
k: getattr(main, k)
|
|
102
|
+
for k in ["title", "authors", "venue", "type", "year"]
|
|
103
|
+
},
|
|
60
104
|
sources=sources,
|
|
61
105
|
)
|
|
62
|
-
for k, v in main.__dict__.items():
|
|
63
|
-
if k not in cls.__match_args__:
|
|
64
|
-
setattr(res, k, v)
|
|
65
106
|
return res
|
|
66
107
|
|
|
67
|
-
@staticmethod
|
|
68
|
-
def score_source(source):
|
|
69
|
-
scores = [v.get(getattr(source, k, None), 0) for k, v in score_rosetta.items()]
|
|
70
|
-
scores.append(source.year)
|
|
71
|
-
return tuple(scores)
|
|
72
|
-
|
|
73
108
|
|
|
74
109
|
def regroup_authors(auth_dict, pub_dict):
|
|
75
110
|
"""
|
|
@@ -100,7 +135,7 @@ def regroup_authors(auth_dict, pub_dict):
|
|
|
100
135
|
pub.authors = [redirection.get(a.key, a) for a in pub.authors]
|
|
101
136
|
|
|
102
137
|
|
|
103
|
-
def regroup_publications(pub_dict, threshold=
|
|
138
|
+
def regroup_publications(pub_dict, threshold=85, length_impact=0.05, n_range=5):
|
|
104
139
|
"""
|
|
105
140
|
Puts together copies of the same publication.
|
|
106
141
|
|
|
@@ -119,17 +154,19 @@ def regroup_publications(pub_dict, threshold=90, length_impact=0.2):
|
|
|
119
154
|
Unified publications.
|
|
120
155
|
"""
|
|
121
156
|
pub_list = [p for p in pub_dict.values()]
|
|
157
|
+
res = dict()
|
|
122
158
|
|
|
123
|
-
|
|
124
|
-
p.fit([paper.title for paper in pub_list])
|
|
159
|
+
if pub_list:
|
|
125
160
|
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
161
|
+
p = Process(length_impact=length_impact, n_range=n_range)
|
|
162
|
+
p.fit([paper.title for paper in pub_list])
|
|
163
|
+
|
|
164
|
+
done = np.zeros(len(pub_list), dtype=bool)
|
|
165
|
+
for i, paper in enumerate(pub_list):
|
|
166
|
+
if done[i]:
|
|
167
|
+
continue
|
|
168
|
+
locs = np.where(p.transform([paper.title])[0, :] > threshold)[0]
|
|
169
|
+
pub = SourcedPublication.from_sources([pub_list[i] for i in locs])
|
|
170
|
+
res[pub.key] = pub
|
|
171
|
+
done[locs] = True
|
|
135
172
|
return res
|