gismap 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gismap/__init__.py ADDED
@@ -0,0 +1,20 @@
1
+ """Top-level package for Analytical Lab Cartography In Computer Science."""
2
+
3
+ from importlib.metadata import metadata
4
+
5
+ from gismap.utils.common import get_classes as get_classes
6
+ from gismap.gismo import make_gismo as make_gismo
7
+ from gismap.search import (
8
+ Search as Search,
9
+ SearchDocuments as SearchDocuments,
10
+ SearchLandmarks as SearchLandmarks,
11
+ SearchFeatures as SearchFeatures,
12
+ search_to_html as search_to_html,
13
+ search_to_text as search_to_text,
14
+ )
15
+
16
+
17
+ infos = metadata(__name__)
18
+ __version__ = infos["Version"]
19
+ __author__ = """Fabien Mathieu"""
20
+ __email__ = "loufab@gmail.com"
gismap/author.py ADDED
File without changes
gismap/gismap.py ADDED
@@ -0,0 +1 @@
1
+ """Main module."""
gismap/gismo.py ADDED
@@ -0,0 +1,379 @@
1
+ from sklearn.feature_extraction.text import CountVectorizer
2
+
3
+ from gismo.corpus import Corpus
4
+ from gismo.embedding import Embedding
5
+ from gismo.gismo import Gismo
6
+
7
+
8
+ def make_post_publi(lab):
9
+ """
10
+ Hook to turn publication key stored in a corpus into actual publication.
11
+
12
+ Parameters
13
+ ----------
14
+ lab: :class:`~gismap.lab.lab.Lab`
15
+ Lab that contains the corpus publications.
16
+
17
+ Returns
18
+ -------
19
+ callable
20
+ """
21
+
22
+ def to_bib(g, i):
23
+ item = g.corpus[i]
24
+ return lab.publications[item]
25
+
26
+ return to_bib
27
+
28
+
29
+ def make_gismo(lab, vectorizer_parameters=None):
30
+ """
31
+ Makes a gismo out of a lab.
32
+
33
+ Parameters
34
+ ----------
35
+ lab: :class:`~gismap.lab.lab.Lab`
36
+ Lab that contains publications.
37
+ vectorizer_parameters: :class:`dict`
38
+ Overriding parameters for the Countvectorizer of the gismo.
39
+
40
+ Returns
41
+ -------
42
+ gismo: :class:`~gismo.gismo.Gismo`
43
+ Gismo of the lab.
44
+ """
45
+ parameters = {"ngram_range": (1, 3), "dtype": float, "stop_words": sw, "min_df": 3}
46
+ if vectorizer_parameters is not None:
47
+ parameters.update(vectorizer_parameters)
48
+ corpus = Corpus([p for p in lab.publications], to_text=lab.publi_to_text)
49
+ vectorizer = CountVectorizer(**parameters)
50
+ embedding = Embedding(vectorizer=vectorizer)
51
+ embedding.fit_transform(corpus)
52
+ gismo = Gismo(corpus, embedding)
53
+ gismo.post_documents_item = make_post_publi(lab)
54
+ return gismo
55
+
56
+
57
+ stop_words = [
58
+ "01",
59
+ "20plus",
60
+ "academia",
61
+ "academic",
62
+ "academy",
63
+ "académie",
64
+ "acm",
65
+ "activities",
66
+ "actualités",
67
+ "adresse",
68
+ "advances",
69
+ "affichertoutesdepuis",
70
+ "ajouter",
71
+ "an",
72
+ "ancitations",
73
+ "and",
74
+ "annual",
75
+ "annéetrier",
76
+ "antipolis",
77
+ "are",
78
+ "article",
79
+ "articledisponiblesnon",
80
+ "articles",
81
+ "articles0",
82
+ "arxiv",
83
+ "as",
84
+ "astérisque",
85
+ "at",
86
+ "attended",
87
+ "au",
88
+ "auteuradresse",
89
+ "auteurnouveaux",
90
+ "auteurnouvelles",
91
+ "aux",
92
+ "award",
93
+ "awarded",
94
+ "awards",
95
+ "base",
96
+ "based",
97
+ "be",
98
+ "been",
99
+ "bibliography",
100
+ "bibliothèquemétriquesalertesparamètresconnexionconnexionobtenir",
101
+ "board",
102
+ "book",
103
+ "born",
104
+ "by",
105
+ "california",
106
+ "called",
107
+ "can",
108
+ "celles",
109
+ "centrale",
110
+ "centre",
111
+ "ces",
112
+ "cet",
113
+ "cette",
114
+ "chair",
115
+ "chaussées",
116
+ "ciarlet",
117
+ "citations",
118
+ "citationstrier",
119
+ "cited",
120
+ "citée",
121
+ "cnrs",
122
+ "cnrsadresse",
123
+ "coauteurscoauteurssuivre",
124
+ "collaboration",
125
+ "colloquium",
126
+ "collège",
127
+ "columbia",
128
+ "committee",
129
+ "comptabilisées",
130
+ "conference",
131
+ "contact",
132
+ "contributions",
133
+ "council",
134
+ "cours",
135
+ "course",
136
+ "courses",
137
+ "dans",
138
+ "de",
139
+ "des",
140
+ "diegoadresse",
141
+ "différentes",
142
+ "director",
143
+ "disponiblessur",
144
+ "doi",
145
+ "données",
146
+ "doubleles",
147
+ "du",
148
+ "décompte",
149
+ "early",
150
+ "earned",
151
+ "ecole",
152
+ "ed",
153
+ "edinburgh",
154
+ "elected",
155
+ "en",
156
+ "english",
157
+ "ens",
158
+ "envoi",
159
+ "et",
160
+ "europaea",
161
+ "european",
162
+ "events",
163
+ "exigences",
164
+ "fellow",
165
+ "financementcoauteurstout",
166
+ "for",
167
+ "formerly",
168
+ "forum",
169
+ "fr",
170
+ "france",
171
+ "franceadresse",
172
+ "français",
173
+ "françois",
174
+ "french",
175
+ "from",
176
+ "fusionnéesle",
177
+ "fusionnés",
178
+ "google",
179
+ "grenoble",
180
+ "gérard",
181
+ "habilitation",
182
+ "had",
183
+ "he",
184
+ "her",
185
+ "here",
186
+ "highly",
187
+ "his",
188
+ "home",
189
+ "hong",
190
+ "honorary",
191
+ "ici",
192
+ "ieee",
193
+ "imag",
194
+ "in",
195
+ "inclut",
196
+ "informatique",
197
+ "innovation",
198
+ "inria",
199
+ "insa",
200
+ "institut",
201
+ "international",
202
+ "invited",
203
+ "is",
204
+ "isbn",
205
+ "it",
206
+ "je",
207
+ "jean",
208
+ "jour",
209
+ "journal",
210
+ "july",
211
+ "known",
212
+ "kong",
213
+ "la",
214
+ "lab",
215
+ "le",
216
+ "lecture",
217
+ "les",
218
+ "lille",
219
+ "liées",
220
+ "liés",
221
+ "lncs",
222
+ "lyon",
223
+ "mail",
224
+ "maintenant",
225
+ "medal",
226
+ "media",
227
+ "member",
228
+ "mises",
229
+ "mon",
230
+ "monde",
231
+ "my",
232
+ "mécanique",
233
+ "national",
234
+ "ne",
235
+ "nombre",
236
+ "normale",
237
+ "notes",
238
+ "notificationsokmon",
239
+ "novel",
240
+ "of",
241
+ "olivier",
242
+ "on",
243
+ "opération",
244
+ "ordre",
245
+ "page",
246
+ "pages",
247
+ "paper",
248
+ "par",
249
+ "paraccès",
250
+ "parcitée",
251
+ "paris",
252
+ "paristech",
253
+ "partout",
254
+ "pas",
255
+ "pdf",
256
+ "peut",
257
+ "peuvent",
258
+ "peux",
259
+ "ph",
260
+ "phd",
261
+ "pierre",
262
+ "plus",
263
+ "polytechnique",
264
+ "ponts",
265
+ "pour",
266
+ "pp",
267
+ "premier",
268
+ "preprint",
269
+ "president",
270
+ "prix",
271
+ "prize",
272
+ "proceedings",
273
+ "professor",
274
+ "profil",
275
+ "profilcitée",
276
+ "profilma",
277
+ "programme",
278
+ "programmes",
279
+ "propre",
280
+ "présentation",
281
+ "publications",
282
+ "publiccoauteurstitretriertrier",
283
+ "published",
284
+ "que",
285
+ "qui",
286
+ "received",
287
+ "recherche",
288
+ "record",
289
+ "report",
290
+ "research",
291
+ "researcher",
292
+ "réaliser",
293
+ "réessayer",
294
+ "résultatsaideconfidentialitéconditions",
295
+ "saclayadresse",
296
+ "san",
297
+ "scholar",
298
+ "scholarchargement",
299
+ "school",
300
+ "sciences",
301
+ "scientifique",
302
+ "scientist",
303
+ "selected",
304
+ "senior",
305
+ "she",
306
+ "sigmod",
307
+ "silver",
308
+ "site",
309
+ "slides",
310
+ "sont",
311
+ "sophia",
312
+ "sorbonne",
313
+ "southern",
314
+ "speaker",
315
+ "springer",
316
+ "stanford",
317
+ "sud",
318
+ "suivants",
319
+ "suivies",
320
+ "summer",
321
+ "supervision",
322
+ "supérieure",
323
+ "supérieureadresse",
324
+ "sur",
325
+ "symposium",
326
+ "système",
327
+ "tard",
328
+ "temps",
329
+ "texas",
330
+ "that",
331
+ "the",
332
+ "then",
333
+ "theses",
334
+ "titrecitée",
335
+ "to",
336
+ "transactions",
337
+ "travaux",
338
+ "télécom",
339
+ "un",
340
+ "under",
341
+ "une",
342
+ "univ",
343
+ "university",
344
+ "universityadresse",
345
+ "université",
346
+ "upmc",
347
+ "usa",
348
+ "using",
349
+ "validée",
350
+ "verimag",
351
+ "verlag",
352
+ "veuillez",
353
+ "via",
354
+ "vol",
355
+ "was",
356
+ "with",
357
+ "won",
358
+ "worked",
359
+ "www",
360
+ "year",
361
+ "école",
362
+ "être",
363
+ "towards",
364
+ "paradigm",
365
+ "we",
366
+ "this",
367
+ "which",
368
+ "our",
369
+ "proposed",
370
+ "their",
371
+ "approach",
372
+ "each",
373
+ "such",
374
+ "show",
375
+ "what",
376
+ "nous",
377
+ ]
378
+
379
+ sw = stop_words + [str(i) for i in range(2100)]
gismap/lab/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ from gismap.lab.lab import (
2
+ Lab as Lab,
3
+ ListLab as ListLab,
4
+ LabAuthor as LabAuthor,
5
+ AuthorMetadata as AuthorMetadata,
6
+ )
7
+ from gismap.lab.lip6 import Lip6 as Lip6, Lip6Lab as Lip6Lab
8
+ from gismap.lab.toulouse import Solace as Solace, LaasLab as LaasLab
9
+ from gismap.lab.graph import lab2graph as lab2graph
10
+ from gismap.lab.vis import generate_html as generate_html
gismap/lab/graph.py ADDED
@@ -0,0 +1,234 @@
1
+ import numpy as np
2
+ from collections import defaultdict
3
+ from itertools import combinations
4
+ from gismap.lab.vis import generate_html
5
+
6
+
7
+ def initials(name):
8
+ """
9
+ Parameters
10
+ ----------
11
+ name: :class:`str`
12
+ Person's name.
13
+
14
+ Returns
15
+ -------
16
+ :class:`str`
17
+ Person's initials (2 letters only).
18
+ """
19
+ first_letters = [w[0] for w in name.split()]
20
+ return first_letters[0] + first_letters[-1]
21
+
22
+
23
+ def author_to_html(author):
24
+ """
25
+ Parameters
26
+ ----------
27
+ author: :class:`~gismap.sources.models.Author`
28
+ Searcher.
29
+
30
+ Returns
31
+ -------
32
+ HTML string with URL if applicable.
33
+ """
34
+ name = getattr(author, "name", "Unknown Author")
35
+ # Try direct URL property (optional)
36
+ url = getattr(author, "url", None)
37
+ # For LabAuthor, check metadata.url
38
+ if hasattr(author, "metadata"):
39
+ meta_url = getattr(author.metadata, "url", None)
40
+ if meta_url:
41
+ url = meta_url
42
+ elif hasattr(author.sources[0], "url"):
43
+ url = author.sources[0].url
44
+ if url:
45
+ return f'<a href="{url.strip()}" target="_blank">{name}</a>'
46
+ else:
47
+ return name
48
+
49
+
50
+ def publication_to_html(pub):
51
+ """
52
+ Parameters
53
+ ----------
54
+ pub: :class:`~gismap.sources.models.Publication`
55
+ Publication.
56
+
57
+ Returns
58
+ -------
59
+ HTML string with hyperlinks where applicable.
60
+ """
61
+ # Title as link if available
62
+ if getattr(pub, "url", None):
63
+ title_html = f'<a href="{pub.url}" target="_blank">{pub.title}</a>'
64
+ else:
65
+ title_html = pub.title
66
+
67
+ # Authors: render in order, separated by comma
68
+ author_html_list = [
69
+ author_to_html(author) for author in getattr(pub, "authors", [])
70
+ ]
71
+ authors_html = ", ".join(author_html_list)
72
+
73
+ # Venue, Year
74
+ venue = getattr(pub, "venue", "")
75
+ year = getattr(pub, "year", "")
76
+
77
+ # Basic HTML layout
78
+ html = f"{title_html}, by <i>{authors_html}</i>. {venue}, {year}."
79
+ return html.strip()
80
+
81
+
82
+ def publications_list_html(publications, n=10):
83
+ """
84
+
85
+ Parameters
86
+ ----------
87
+ publications: :class:`list` of :class:`~gismap.sources.models.Publication`
88
+ Publications to display.
89
+ n: :class:`int`, default=10
90
+ Number of publications to display. If there are more publications, a *Show more* option is available to unravel them.
91
+
92
+ Returns
93
+ -------
94
+ :class:`str`
95
+ """
96
+ list_items = []
97
+ for i, pub in enumerate(publications):
98
+ item = publication_to_html(pub)
99
+ if i < n:
100
+ li = f"<li>{item}</li>"
101
+ else:
102
+ li = f'<li class="extra-publication" style="display:none;">{item}</li>'
103
+ list_items.append(li)
104
+ ul_content = "\n".join(list_items)
105
+
106
+ if len(publications) <= n:
107
+ show_more_part = ""
108
+ else:
109
+ # Add a "Show more" link and JavaScript for toggling
110
+ show_more_part = """
111
+ <li>
112
+ <a href="#" onclick="
113
+ var elts = this.parentElement.parentElement.querySelectorAll('.extra-publication');
114
+ for (var i = 0; i < elts.length; ++i) {elts[i].style.display = 'list-item';}
115
+ this.parentElement.style.display = 'none';
116
+ return false;">Show more…</a>
117
+ </li>
118
+ """
119
+
120
+ html = f"""<ul>
121
+ {ul_content}
122
+ {show_more_part}
123
+ </ul>
124
+ """
125
+ return html
126
+
127
+
128
+ def to_node(s, node_pubs):
129
+ """
130
+ Parameters
131
+ ----------
132
+ s: :class:`~gismap.lab.lab.LabAuthor`
133
+ Searcher.
134
+ node_pubs: :class:`dict`
135
+ Lab publications.
136
+
137
+ Returns
138
+ -------
139
+ :class:`dict`
140
+ A display-ready representation of the searcher.
141
+ """
142
+ res = {
143
+ "id": s.key,
144
+ "hover": f"Click for details on {s.name}.",
145
+ "overlay": f"<div> Publications of {author_to_html(s)}:</div><div>{publications_list_html(node_pubs[s.key])}</div>",
146
+ "group": s.metadata.group,
147
+ }
148
+ if s.metadata.img:
149
+ res.update({"image": s.metadata.img, "shape": "circularImage"})
150
+ else:
151
+ res["label"] = initials(s.name)
152
+ if s.metadata.position:
153
+ x, y = s.metadata.position
154
+ res.update({"x": x, "y": y, "fixed": True})
155
+ return res
156
+
157
+
158
+ def to_edge(k, v, searchers):
159
+ """
160
+ Parameters
161
+ ----------
162
+ k: :class:`tuple`
163
+ Keys of the searchers involved.
164
+ v: :class:`list`
165
+ List of joint publications.
166
+ searchers: :class:`dict`
167
+ Searchers.
168
+
169
+ Returns
170
+ -------
171
+ :class:`dict`
172
+ A display-ready representation of the collaboration edge.
173
+ """
174
+ strength = 1 + np.log2(len(v))
175
+ return {
176
+ "from": k[0],
177
+ "to": k[1],
178
+ "hover": f"Show joint publications from {searchers[k[0]].name} and {searchers[k[1]].name}",
179
+ "overlay": f"<div> Joint publications from {author_to_html(searchers[k[0]])} and {author_to_html(searchers[k[1]])}:</div><div>{publications_list_html(v)}</div>",
180
+ "width": int(strength),
181
+ "length": int(200 / strength),
182
+ }
183
+
184
+
185
+ def lab2graph(lab):
186
+ """
187
+ Parameters
188
+ ----------
189
+ lab: :class:`~gismap.lab.lab.Lab`
190
+ A lab populated with searchers and publications.
191
+
192
+ Returns
193
+ -------
194
+ :class:`str`
195
+ Collaboration graph.
196
+
197
+ Examples
198
+ --------
199
+
200
+ >>> from gismap.lab import ListLab
201
+ >>> lab = ListLab(author_list=['Tixeuil Sébastien', 'Mathieu Fabien'], name='mini')
202
+ >>> lab.update_authors()
203
+ >>> lab.update_publis()
204
+ >>> len(lab.authors)
205
+ 2
206
+ >>> len(lab.publications)
207
+ 453
208
+ >>> html = lab2graph(lab)
209
+ >>> html[:80] # doctest: +ELLIPSIS
210
+ '\\n<div id="mynetwork_..."></div>\\n<div id="modal_..." class="modal">\\n <'
211
+ """
212
+ node_pubs = {k: [] for k in lab.authors}
213
+ edges_dict = defaultdict(list)
214
+ for p in lab.publications.values():
215
+ # Some authors are twice in the author list for some reason. This should take care of it.
216
+ lauths = {a.key: a for a in p.authors if a.__class__.__name__ == "LabAuthor"}
217
+ lauths = sorted([a for a in lauths.values()], key=lambda a: str(a.key))
218
+ for a in lauths:
219
+ node_pubs[a.key].append(p)
220
+ for a1, a2 in combinations(lauths, 2):
221
+ edges_dict[a1.key, a2.key].append(p)
222
+ connected = {k for kl in edges_dict for k in kl}
223
+
224
+ for k, v in node_pubs.items():
225
+ node_pubs[k] = sorted(v, key=lambda p: -p.year)
226
+ for k, v in edges_dict.items():
227
+ edges_dict[k] = sorted(v, key=lambda p: -p.year)
228
+
229
+ return generate_html(
230
+ nodes=[
231
+ to_node(s, node_pubs) for s in lab.authors.values() if s.key in connected
232
+ ],
233
+ edges=[to_edge(k, v, lab.authors) for k, v in edges_dict.items()],
234
+ )