gismap 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gismap/lab/lab.py ADDED
@@ -0,0 +1,152 @@
1
+ from gismo import MixInIO
2
+ from dataclasses import dataclass, field
3
+
4
+ from gismap.utils.common import LazyRepr
5
+ from gismap.utils.logger import logger
6
+ from gismap.sources.multi import SourcedAuthor
7
+ from gismap.sources.multi import regroup_authors, regroup_publications
8
+ from gismap.sources.hal import HAL
9
+ from gismap.sources.dblp import DBLP
10
+
11
+
12
+ @dataclass(repr=False)
13
+ class AuthorMetadata(LazyRepr):
14
+ """
15
+ Optional information about an author to be used to enhance her presentation.
16
+
17
+ Attributes
18
+ ----------
19
+
20
+ url: :class:`str`
21
+ Homepage of the author.
22
+ img: :class:`str`
23
+ Url to a picture.
24
+ group: :class:`str`
25
+ Group of the author.
26
+ position: :class:`tuple`
27
+ Coordinates of the author.
28
+ """
29
+
30
+ url: str = None
31
+ img: str = None
32
+ group: str = None
33
+ position: tuple = None
34
+
35
+
36
+ @dataclass(repr=False)
37
+ class LabAuthor(SourcedAuthor):
38
+ metadata: AuthorMetadata = field(default_factory=AuthorMetadata)
39
+
40
+ def auto_sources(self, dbs=None):
41
+ """
42
+ Automatically populate the sources based on author's name.
43
+
44
+ Parameters
45
+ ----------
46
+ dbs: :class:`list`, default=[:class:`~gismap.sources.hal.HAL`, :class:`~gismap.sources.dblp.DBLP`]
47
+ List of DB sources to use.
48
+
49
+ Returns
50
+ -------
51
+ None
52
+ """
53
+ if dbs is None:
54
+ dbs = [HAL, DBLP]
55
+ sources = []
56
+ for db in dbs:
57
+ source = db.search_author(self.name)
58
+ if len(source) == 0:
59
+ logger.warning(f"{self.name} not found in {db.db_name}")
60
+ elif len(source) > 1:
61
+ logger.warning(f"Multiple entries for {self.name} in {db.db_name}")
62
+ sources += source
63
+ if len(sources) > 0:
64
+ self.sources = sources
65
+
66
+
67
+ class Lab(MixInIO):
68
+ """
69
+ Abstract class for labs.
70
+
71
+ Labs can be saved with the `dump` method and loaded with the `load` method.
72
+
73
+ Parameters
74
+ ----------
75
+ name: :class:`str`
76
+ Name of the lab. Can be set as class or instance attribute.
77
+ dbs: :class:`list`, default=[:class:`~gismap.sources.hal.HAL`, :class:`~gismap.sources.dblp.DBLP`]
78
+ List of DB sources to use.
79
+ """
80
+
81
+ name = None
82
+ dbs = [HAL, DBLP]
83
+
84
+ def __init__(self, name=None, dbs=None):
85
+ if name is not None:
86
+ self.name = name
87
+ if dbs is not None:
88
+ self.dbs = dbs
89
+ self.authors = None
90
+ self.publications = None
91
+
92
+ def __repr__(self):
93
+ return f"Lab {self.name}"
94
+
95
+ def _author_iterator(self):
96
+ """
97
+ Yields
98
+ ------
99
+ :class:`~gismap.lab.lab.LabAuthor`
100
+ """
101
+ raise NotImplementedError
102
+
103
+ def update_authors(self):
104
+ """
105
+ Populate the authors attribute (:class:`dict` [:class:`str`, :class:`~gismap.lab.lab.LabAuthor`]).
106
+
107
+ Returns
108
+ -------
109
+ None
110
+ """
111
+ self.authors = dict()
112
+ for author in self._author_iterator():
113
+ author.auto_sources(dbs=self.dbs)
114
+ if author.sources:
115
+ self.authors[author.key] = author
116
+
117
+ def update_publis(self):
118
+ """
119
+ Populate the publications attribute (:class:`dict` [:class:`str`, :class:`~gismap.sources.multi.SourcedPublication`]).
120
+
121
+ Returns
122
+ -------
123
+ None
124
+ """
125
+ pubs = dict()
126
+ for author in self.authors.values():
127
+ pubs.update(author.get_publications(clean=False))
128
+ regroup_authors(self.authors, pubs)
129
+ self.publications = regroup_publications(pubs)
130
+
131
+
132
+ class ListLab(Lab):
133
+ """
134
+ Simplest way to create a lab: with a list of names.
135
+
136
+ Parameters
137
+ ----------
138
+ author_list: :class:`list` of :class:`str`
139
+ List of authors names.
140
+ args: :class:`list`
141
+ Arguments to pass to the :class:`~gismap.lab.lab.Lab` constuctor.
142
+ kwargs: :class:`dict`
143
+ Keyword arguments to pass to the :class:`~gismap.lab.lab.Lab` constuctor.
144
+ """
145
+
146
+ def __init__(self, author_list, *args, **kwargs):
147
+ self.author_list = author_list
148
+ super().__init__(*args, **kwargs)
149
+
150
+ def _author_iterator(self):
151
+ for name in self.author_list:
152
+ yield LabAuthor(name=name, metadata=AuthorMetadata())
gismap/lab/lip6.py ADDED
@@ -0,0 +1,43 @@
1
+ from bs4 import BeautifulSoup as Soup
2
+ import re
3
+
4
+ from gismap.lab.lab import Lab, AuthorMetadata, LabAuthor
5
+ from gismap.utils.requests import get
6
+
7
+
8
+ class Lip6Lab(Lab):
9
+ """
10
+ Class for handling a LIP6 team using `https://www.lip6.fr/recherche/team_membres.php?acronyme=*team_acronym*` as entry point.
11
+ Default to `NPA` team.
12
+ """
13
+
14
+ name = "NPA"
15
+
16
+ def _author_iterator(self):
17
+ url = f"https://www.lip6.fr/recherche/team_membres.php?acronyme={self.name}"
18
+ soup = Soup(get(url), "lxml")
19
+ for a in soup.table("a"):
20
+ name = a.text.replace("\xa0", " ").strip()
21
+ if not name:
22
+ continue
23
+ metadata = AuthorMetadata(group=self.name)
24
+ previous = a.find_previous_sibling()
25
+ if previous is not None and "user" in previous.get("class", []):
26
+ metadata.url = previous["href"].strip()
27
+ yield LabAuthor(name=name, metadata=metadata)
28
+
29
+
30
+ class Lip6(Lip6Lab):
31
+ """
32
+ Class for handling all LIP6 teams using `https://www.lip6.fr/informations/annuaire.php` to get team names.
33
+ """
34
+
35
+ name = "LIP6"
36
+
37
+ def _author_iterator(self):
38
+ groups = re.compile(r'acronyme=(.*?)[\'"]')
39
+ for group in groups.findall(
40
+ get("https://www.lip6.fr/informations/annuaire.php")
41
+ ):
42
+ for author in Lip6Lab(name=group)._author_iterator():
43
+ yield author
gismap/lab/toulouse.py ADDED
@@ -0,0 +1,47 @@
1
+ import re
2
+ from bs4 import BeautifulSoup as Soup
3
+ from gismap.lab.lab import Lab, AuthorMetadata, LabAuthor
4
+ from gismap.utils.requests import get
5
+
6
+
7
+ def name_changer(name, rosetta):
8
+ return rosetta.get(name, name)
9
+
10
+
11
+ class LaasLab(Lab):
12
+ """
13
+ Class for handling a LAAS team from its name.
14
+ Default to `sara` team.
15
+ """
16
+
17
+ name = "sara"
18
+ base_url = "https://www.laas.fr"
19
+ rosetta = {"Urtzi Ayesta Morate": "Urtzi Ayesta"}
20
+
21
+ def _author_iterator(self):
22
+ soup = Soup(get(f"{self.base_url}/fr/equipes/{self.name}/"), features="lxml")
23
+ for a in soup("div", {"class": "membre"})[0]("a"):
24
+ url = self.base_url + a["href"]
25
+ name = name_changer(a.img["alt"], self.rosetta)
26
+ img = (
27
+ self.base_url + a.img["src"]
28
+ if "public_avatar" in a.img["class"]
29
+ else None
30
+ )
31
+ yield LabAuthor(name=name, metadata=AuthorMetadata(url=url, img=img))
32
+
33
+
34
+ class Solace(Lab):
35
+ """
36
+ Class for handling the Solace team (`https://solace.cnrs.fr`).
37
+ """
38
+
39
+ name = "Solace"
40
+ regex = re.compile(r"<li>(.*?)(,| \(|</li>)")
41
+
42
+ def _author_iterator(self):
43
+ html = get("https://solace.cnrs.fr/people.html")
44
+ for name, _ in self.regex.findall(html):
45
+ soup = Soup(name, features="lxml")
46
+ url = soup.a["href"] if soup.a else None
47
+ yield LabAuthor(name=soup.text.strip(), metadata=AuthorMetadata(url=url))
gismap/lab/vis.py ADDED
@@ -0,0 +1,171 @@
1
+ from string import Template
2
+ import uuid
3
+ import json
4
+
5
+
6
+ vis_template = Template("""
7
+ <div id="${container_id}"></div>
8
+ <div id="${modal_id}" class="modal">
9
+ <div class="modal-content">
10
+ <span class="close" id="${modal_close_id}">&times;</span>
11
+ <div id="${modal_body_id}"></div>
12
+ </div>
13
+ </div>
14
+ <style>
15
+ /* Styles adaptatifs pour dark/light */
16
+ #${container_id} {
17
+ width: 1200px; height: 800px; border: 1px solid #444;
18
+ background: #181818;
19
+ }
20
+ .modal {
21
+ display: none; position: fixed; z-index: 1000; left: 0; top: 0; width: 100%; height: 100%;
22
+ overflow: auto; background-color: rgba(10,10,10,0.85);
23
+ }
24
+ .modal-content {
25
+ background-color: #23272e; color: #f0f0f0;
26
+ margin: 10% auto; padding: 24px; border: 1px solid #888;
27
+ width: 50%; border-radius: 8px;
28
+ box-shadow: 0 5px 15px rgba(0,0,0,.6);
29
+ }
30
+ .close {
31
+ color: #aaa; float: right; font-size: 28px; font-weight: bold; cursor: pointer;
32
+ }
33
+ .close:hover, .close:focus { color: #fff; text-decoration: none; cursor: pointer; }
34
+ @media (prefers-color-scheme: light) {
35
+ #${container_id} { background: #fff; border: 1px solid #ccc; }
36
+ .modal { background-color: rgba(220,220,220,0.85); }
37
+ .modal-content { background: #fff; color: #222; border: 1px solid #888; }
38
+ .close { color: #222; }
39
+ .close:hover, .close:focus { color: #555; }
40
+ }
41
+ </style>
42
+ <script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
43
+ <script type="text/javascript">
44
+ (function() {
45
+ // Détection du thème
46
+ function getTheme() {
47
+ return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light';
48
+ }
49
+ function getVisOptions(theme) {
50
+ if (theme === 'dark') {
51
+ return {
52
+ nodes: {
53
+ shape: 'circle', size: 20,
54
+ font: { size: 16, color: '#f0f0f0' },
55
+ color: {
56
+ background: '#222e3c',
57
+ border: '#5d90f5',
58
+ highlight: { background: '#2f3d4d', border: '#f5a25d' }
59
+ },
60
+ borderWidth: 2
61
+ },
62
+ edges: {
63
+ width: 2,
64
+ color: { color: '#888', highlight: '#f5a25d' },
65
+ smooth: { type: 'continuous' }
66
+ },
67
+ interaction: { hover: true }
68
+ };
69
+ } else {
70
+ return {
71
+ nodes: {
72
+ shape: 'circle', size: 20,
73
+ font: { size: 16, color: '#222' },
74
+ color: {
75
+ background: '#e3eaff',
76
+ border: '#3d6cf7',
77
+ highlight: { background: '#fffbe6', border: '#f5a25d' }
78
+ },
79
+ borderWidth: 2
80
+ },
81
+ edges: {
82
+ width: 2,
83
+ color: { color: '#848484', highlight: '#f5a25d' },
84
+ smooth: { type: 'continuous' }
85
+ },
86
+ interaction: { hover: true }
87
+ };
88
+ }
89
+ }
90
+ const nodes = new vis.DataSet(${nodes_json});
91
+ const edges = new vis.DataSet(${edges_json});
92
+ const container = document.getElementById('${container_id}');
93
+ let network = null;
94
+ function renderNetwork() {
95
+ const theme = getTheme();
96
+ const options = getVisOptions(theme);
97
+ network = new vis.Network(container, { nodes: nodes, edges: edges }, options);
98
+ // Tooltip survol
99
+ network.on("hoverNode", function(params) {
100
+ const node = nodes.get(params.node);
101
+ network.body.container.title = node.hover || '';
102
+ });
103
+ network.on("blurNode", function(params) {
104
+ network.body.container.title = '';
105
+ });
106
+ network.on("hoverEdge", function(params) {
107
+ const edge = edges.get(params.edge);
108
+ network.body.container.title = edge.hover || '';
109
+ });
110
+ network.on("blurEdge", function(params) {
111
+ network.body.container.title = '';
112
+ });
113
+ // Modal overlay
114
+ const modal = document.getElementById('${modal_id}');
115
+ const modalBody = document.getElementById('${modal_body_id}');
116
+ const modalClose = document.getElementById('${modal_close_id}');
117
+ network.on("click", function(params) {
118
+ if (params.nodes.length === 1) {
119
+ const node = nodes.get(params.nodes[0]);
120
+ modalBody.innerHTML = node.overlay || '';
121
+ modal.style.display = "block";
122
+ } else if (params.edges.length === 1) {
123
+ const edge = edges.get(params.edges[0]);
124
+ modalBody.innerHTML = edge.overlay || '';
125
+ modal.style.display = "block";
126
+ } else {
127
+ modal.style.display = "none";
128
+ }
129
+ });
130
+ modalClose.onclick = function() { modal.style.display = "none"; };
131
+ window.onclick = function(event) {
132
+ if (event.target == modal) { modal.style.display = "none"; }
133
+ };
134
+ }
135
+ renderNetwork();
136
+ // Adapter dynamiquement si le thème change
137
+ window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', function() {
138
+ renderNetwork();
139
+ });
140
+ })();
141
+ </script>
142
+ """)
143
+
144
+
145
+ def generate_html(nodes, edges):
146
+ """
147
+ Parameters
148
+ ----------
149
+ nodes: :class:`list`
150
+ edges: :class:`list`
151
+
152
+ Returns
153
+ -------
154
+ :class:`str`
155
+ """
156
+ uid = str(uuid.uuid4())[:8] # identifiant unique pour éviter les collisions
157
+ container_id = f"mynetwork_{uid}"
158
+ modal_id = f"modal_{uid}"
159
+ modal_body_id = f"modal_body_{uid}"
160
+ modal_close_id = f"modal_close_{uid}"
161
+ nodes_json = json.dumps(nodes)
162
+ edges_json = json.dumps(edges)
163
+ dico = {
164
+ "container_id": container_id,
165
+ "modal_id": modal_id,
166
+ "modal_body_id": modal_body_id,
167
+ "modal_close_id": modal_close_id,
168
+ "nodes_json": nodes_json,
169
+ "edges_json": edges_json,
170
+ }
171
+ return vis_template.substitute(dico) # html_template
gismap/search.py ADDED
@@ -0,0 +1,215 @@
1
+ from collections import defaultdict
2
+ from string import Template
3
+
4
+ from gismap.utils.text import reduce_keywords, Corrector
5
+
6
+
7
+ class SearchAction:
8
+ """
9
+ Blueprint for extracting search results out of a gismo.
10
+ """
11
+
12
+ def __init__(self, name=None, post=None):
13
+ self.name = name
14
+ self.post = (lambda x: x) if post is None else post
15
+
16
+ def process(self, gismo):
17
+ raise NotImplementedError
18
+
19
+ def run(self, gismo):
20
+ return self.post(self.process(gismo))
21
+
22
+
23
+ def p2t(publis):
24
+ """
25
+ Parameters
26
+ ----------
27
+ publis: :class:`list`
28
+ List of publications
29
+
30
+ Returns
31
+ -------
32
+ :class:`str`
33
+ Publications converted in text and concatenated.
34
+ """
35
+ return "\n".join(a.string for a in publis)
36
+
37
+
38
+ def l2t(lis):
39
+ """
40
+
41
+ Parameters
42
+ ----------
43
+ lis: :class:`list`
44
+ List of text.
45
+ Returns
46
+ -------
47
+ :class:`str`
48
+ Concatenation, comma-separated.
49
+ """
50
+ return ", ".join(lis)
51
+
52
+
53
+ class SearchDocuments(SearchAction):
54
+ """Gives *k* best covering articles."""
55
+
56
+ def __init__(self, name="articles", post=None, k=5):
57
+ if post is None:
58
+ post = p2t
59
+ super().__init__(name=name, post=post)
60
+ self.k = k
61
+
62
+ def process(self, gismo):
63
+ return gismo.get_documents_by_coverage(k=self.k)
64
+
65
+
66
+ class SearchFeatures(SearchAction):
67
+ """Gives best keywords."""
68
+
69
+ def __init__(self, name="keywords", post=None):
70
+ if post is None:
71
+ post = l2t
72
+ super().__init__(name=name, post=post)
73
+
74
+ def process(self, gismo):
75
+ return reduce_keywords(gismo.get_features_by_rank())
76
+
77
+
78
+ class SearchLandmarks(SearchAction):
79
+ """Gives best landmarks."""
80
+
81
+ def __init__(self, name="landmarks", post=None, lmks=None):
82
+ if post is None:
83
+ post = l2t
84
+ super().__init__(name=name, post=post)
85
+ self.lmks = lmks
86
+
87
+ def process(self, gismo):
88
+ return self.lmks.get_landmarks_by_rank(gismo)
89
+
90
+
91
+ class Search:
92
+ """
93
+ Builds a gismo search engine.
94
+
95
+ Parameters
96
+ ----------
97
+ gismo: :class:`~gismo.gismo.Gismo`
98
+ Gismo to use.
99
+ action_list: :class:`list`
100
+ List of actions to perform.
101
+ post: callable, optional
102
+ Output transformation.
103
+ corrector: :class:`Bool`, default=True
104
+ Implement word correction.
105
+ """
106
+
107
+ def __init__(self, gismo, action_list, post=None, corrector=True):
108
+ self.gismo = gismo
109
+ self.action_list = action_list
110
+ self.post = (lambda x: x) if post is None else post
111
+ if corrector:
112
+ self.corrector = Corrector(gismo.embedding.features)
113
+ else:
114
+ self.corrector = None
115
+
116
+ def __call__(self, query):
117
+ if self.corrector is not None:
118
+ query = self.corrector(query)
119
+ success = self.gismo.rank(query)
120
+ res = dict()
121
+ if success:
122
+ for action in self.action_list:
123
+ res[action.name] = action.run(self.gismo)
124
+ return self.post({"query": query, "success": success, "results": res})
125
+
126
+
127
+ def search_to_text(res):
128
+ """
129
+ Parameters
130
+ ----------
131
+ res: :class:`dict`
132
+ Raw results of search.
133
+
134
+ Returns
135
+ -------
136
+ :class:`str`
137
+ Text representation of the results.
138
+ """
139
+ query = res["query"]
140
+ if not res["success"]:
141
+ return f"Failure: ``{query}'' not found!"
142
+ output = f"Results for ``{query}'':\n"
143
+ for k, v in res["results"].items():
144
+ output += f"Suggested {k}: {v}\n"
145
+ return output
146
+
147
+
148
+ publi_template = Template("""
149
+ <li>
150
+ <i>$title</i>, by $authors. $venue, $year. $hal $dblp
151
+ </li>
152
+ """)
153
+
154
+
155
+ def publi_to_html(publi):
156
+ dico = dict()
157
+ for db in ["hal", "dblp"]:
158
+ source = publi.sources.get(db)
159
+ if source:
160
+ dico[db] = f"<a href='{source['url']}' target='_blank'>{db.upper()}</a>"
161
+ else:
162
+ dico[db] = ""
163
+ dico["authors"] = ", ".join(a.name for a in publi.authors)
164
+ for key in ["title", "venue", "year"]:
165
+ dico[key] = getattr(publi, key)
166
+ return publi_template.substitute(dico)
167
+
168
+
169
+ def publis_to_html(publis):
170
+ rows = "\n".join(publi_to_html(p) for p in publis)
171
+ return f"<ul>\n{rows}\n</ul>"
172
+
173
+
174
+ html_template = Template("""
175
+ <div>
176
+ <h4>Search: <i>$query</i></h4>
177
+ <div>
178
+ <h5>Associated keywords:</h5>
179
+ <div>$keywords</div>
180
+ </div>
181
+ <div>
182
+ <h5>Associated Projects:</h5>
183
+ <div>$projects</div>
184
+ </div>
185
+ <div>
186
+ <h5>Suggested people:</h5>
187
+ <div>$members</div>
188
+ </div>
189
+ <div>
190
+ <h5>Suggested publications:</h5>
191
+ <div>$publis</div>
192
+ </div>
193
+ </div>
194
+
195
+ """)
196
+
197
+
198
+ def search_to_html(res):
199
+ """
200
+ Parameters
201
+ ----------
202
+ res: :class:`dict`
203
+ Raw results of search.
204
+
205
+ Returns
206
+ -------
207
+ :class:`str`
208
+ HTML representation of the results.
209
+ """
210
+ dico = defaultdict(str)
211
+ dico.update(res["results"])
212
+ dico["query"] = res["query"]
213
+ if res["success"]:
214
+ dico["publis"] = publis_to_html(dico["articles"])
215
+ return html_template.safe_substitute(dico)
File without changes