gismap 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,119 @@
1
+ from string import Template
2
+
3
+
4
+ # language=css
5
+ default_style = Template("""
6
+
7
+ #box-$uid {
8
+ position: relative;
9
+ width: 100% !important;
10
+ height: 80vh !important;
11
+ max-width: 100vw !important;
12
+ max-height: 80vh !important;
13
+ min-height: 80vh;
14
+ color: #111;
15
+ }
16
+
17
+ #vis-$uid {
18
+ height: 100%; /* Make the inner div fill the parent */
19
+ width: 100%; /* Make the inner div fill the parent */
20
+ box-sizing: border-box;
21
+ border: 1px solid #444;
22
+ }
23
+
24
+ html[data-theme="dark"] #vis-$uid {
25
+ background-color: var(--pst-color-background, #14181e);
26
+ }
27
+
28
+ .modal {
29
+ display: none;
30
+ position: fixed;
31
+ z-index: 1000;
32
+ left: 0;
33
+ top: 0;
34
+ width: 100%;
35
+ height: 100%;
36
+ overflow: auto;
37
+ background-color: rgba(10, 10, 10, 0.85);
38
+ }
39
+
40
+ .modal-content {
41
+ background-color: #f4f4f7;
42
+ color: #222235;
43
+ margin: 10% auto;
44
+ padding: 24px;
45
+ border: 1px solid #888;
46
+ width: 50%;
47
+ border-radius: 8px;
48
+ box-shadow: 0 5px 15px rgba(0, 0, 0, .6);
49
+ }
50
+ .modal a {color: #2958d7;}
51
+ .modal a:visited {color: #8435a8;}
52
+
53
+ .close {
54
+ color: #777;
55
+ float: right;
56
+ font-size: 28px;
57
+ font-weight: bold;
58
+ cursor: pointer;
59
+ }
60
+
61
+ .close:hover, .close:focus {
62
+ color: #aaa;
63
+ text-decoration: none;
64
+ cursor: pointer;
65
+ }
66
+
67
+ .watermark {
68
+ position: absolute;
69
+ text-decoration: none;
70
+ color: #888;
71
+ font-size: min(2vw, 10px);
72
+ z-index: 10;
73
+ }
74
+
75
+ .gislink {
76
+ left: 10px;
77
+ bottom: 10px;
78
+ pointer-events: auto;
79
+ }
80
+
81
+ .button {
82
+ background: none;
83
+ border: none;
84
+ padding: 0;
85
+ margin: 0;
86
+ cursor: pointer;
87
+ }
88
+
89
+ .redraw {
90
+ left: 10px;
91
+ top: 10px;
92
+ }
93
+
94
+ .fullscreen {
95
+ bottom: 10px;
96
+ right: 10px;
97
+ }
98
+
99
+ .legend {
100
+ display: inline-block;
101
+ padding: 10px 16px;
102
+ border-radius: 8px;
103
+ box-shadow: 0 2px 8px 0 rgba(0, 0, 0, 0.10);
104
+ border: 1px solid var(--legend-border, #bbb);
105
+ background: var(--jp-layout-color1, #f5f5fa);
106
+ background-color: var(--legend-bg, rgba(240, 240, 245, 0.95));
107
+ position: absolute;
108
+ top: 12px;
109
+ right: 12px;
110
+ z-index: 20;
111
+ }
112
+
113
+ .legend-entry {
114
+ display: flex;
115
+ margin-right: 10px;
116
+ align-items: center;
117
+ cursor: pointer;
118
+ }
119
+ """)
@@ -0,0 +1,145 @@
1
+ import re
2
+ import unicodedata
3
+ import base64
4
+ from IPython.display import display, HTML
5
+ import ipywidgets as widgets
6
+ from contextlib import contextmanager
7
+
8
+ from gismap.lab.egomap import EgoMap
9
+ from gismap.lab.labmap import ListMap
10
+
11
+
12
+ @contextmanager
13
+ def dummy_context():
14
+ yield
15
+
16
+
17
+ def safe_filename(name):
18
+ """
19
+ Parameters
20
+ ----------
21
+ name: :class:`str`
22
+ Pretty much anything.
23
+
24
+ Returns
25
+ -------
26
+ :class:`str`
27
+ GisMap filename.
28
+ """
29
+ normalized = unicodedata.normalize("NFKD", name)
30
+ ascii_only = normalized.encode("ascii", "ignore").decode("ascii")
31
+ ascii_only = ascii_only.replace(" ", "_")
32
+ safe_str = re.sub(r"[^a-zA-Z0-9_]", "", ascii_only)
33
+ return f"gismap-{safe_str[:60]}.html"
34
+
35
+
36
+ place_holder = "Diego Perino, The-Dang Huynh, François Durand (hal: fradurand, dblp: 38/11269), Rim Kaddah, Leonardo Linguaglossa, Céline Comte"
37
+
38
+
39
+ class GismapWidget:
40
+ """
41
+ A simple widget to test the production of LabMaps and EgoMaps.
42
+
43
+ Examples
44
+ --------
45
+
46
+ This is a doctest example. Use a notebook to play with the widget.
47
+
48
+ >>> gw = GismapWidget() # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
49
+ VBox(children=(HTML(value=''), Output(), HBox(children=(Textarea(value='', ...
50
+ >>> gw.names.value = "Fabien Mathieu"
51
+ >>> gw.dbs.value = "HAL"
52
+ >>> gw.size.value = 3
53
+ >>> gw.compute_function(gw.compute, show=False)
54
+ >>> gw.save_link.value[:30]
55
+ "<a href='data:text/html;base64"
56
+ >>> gw.names.value = "Diego Perino, Laurent Viennot"
57
+ >>> gw.compute_function(gw.compute, show=False)
58
+ >>> gw.save_link.value[:30]
59
+ "<a href='data:text/html;base64"
60
+ """
61
+
62
+ def __init__(self):
63
+ self.names = widgets.Textarea(
64
+ placeholder=place_holder,
65
+ description="Name(s):",
66
+ layout=widgets.Layout(width="50%", height="100px"),
67
+ )
68
+ self.dbs = widgets.RadioButtons(
69
+ options=["HAL", "DBLP", "Both"],
70
+ description="DB(s):",
71
+ layout=widgets.Layout(width="80px", max_width="20%"),
72
+ )
73
+ self.size = widgets.IntSlider(
74
+ value=10,
75
+ min=1,
76
+ max=150,
77
+ step=1,
78
+ description="Size",
79
+ layout=widgets.Layout(width="250px"),
80
+ )
81
+ self.compute = widgets.Button(
82
+ description="Map!", layout=widgets.Layout(width="120px", max_width="140px")
83
+ )
84
+ self._col = widgets.VBox(
85
+ [self.size, self.compute],
86
+ layout=widgets.Layout(
87
+ align_items="center", max_width="27%", overflow="hidden"
88
+ ),
89
+ )
90
+ self.save_link = widgets.HTML(value="")
91
+ self.compute.on_click(self.compute_function)
92
+ self.out = widgets.Output()
93
+ self.widget = widgets.VBox(
94
+ [self.save_link, self.out, widgets.HBox([self.names, self._col, self.dbs])]
95
+ )
96
+ display(self.widget)
97
+ self.show = True
98
+
99
+ def html(self):
100
+ dbs = (
101
+ "hal"
102
+ if self.dbs.value == "HAL"
103
+ else "dblp"
104
+ if self.dbs.value == "DBLP"
105
+ else ["hal", "dblp"]
106
+ )
107
+ name = self.names.value
108
+ pattern = r",\s*(?![^()]*\))"
109
+ names = [n.strip() for n in re.split(pattern, name)]
110
+ self.save_link.value = ""
111
+ ctx = self.out if self.show else dummy_context()
112
+ if len(names) > 1:
113
+ lab = ListMap(names, dbs=dbs, name="planet")
114
+ if self.show:
115
+ self.out.clear_output()
116
+ with ctx:
117
+ lab.update_authors()
118
+ lab.update_publis()
119
+ extra = self.size.value - len(lab.authors)
120
+ if extra > 0:
121
+ lab.expand(target=extra)
122
+ else:
123
+ lab = EgoMap(names[0], dbs=dbs)
124
+ if self.show:
125
+ self.out.clear_output()
126
+ with ctx:
127
+ lab.build(target=self.size.value)
128
+ return lab.html()
129
+
130
+ def compute_function(self, b, show=True):
131
+ self.show = show
132
+ full = self.html()
133
+ b64 = base64.b64encode(
134
+ f"<html><body>{full}</body></html>".encode("utf8")
135
+ ).decode("utf8")
136
+ payload = f"data:text/html;base64,{b64}"
137
+ savename = safe_filename(self.names.value)
138
+ link_html = (
139
+ f"<a href='{payload}' download='{savename}'>Download the Map!</a>"
140
+ )
141
+ self.save_link.value = link_html
142
+ if show:
143
+ self.out.clear_output()
144
+ with self.out:
145
+ display(HTML(full))
gismap/gismo.py CHANGED
@@ -11,7 +11,7 @@ def make_post_publi(lab):
11
11
 
12
12
  Parameters
13
13
  ----------
14
- lab: :class:`~gismap.lab.lab.Lab`
14
+ lab: :class:`~gismap.lab.labmap.LabMap`
15
15
  Lab that contains the corpus publications.
16
16
 
17
17
  Returns
@@ -32,7 +32,7 @@ def make_gismo(lab, vectorizer_parameters=None):
32
32
 
33
33
  Parameters
34
34
  ----------
35
- lab: :class:`~gismap.lab.lab.Lab`
35
+ lab: :class:`~gismap.lab.labmap.LabMap`
36
36
  Lab that contains publications.
37
37
  vectorizer_parameters: :class:`dict`
38
38
  Overriding parameters for the Countvectorizer of the gismo.
gismap/lab/__init__.py CHANGED
@@ -1,10 +1,6 @@
1
- from gismap.lab.lab import (
2
- Lab as Lab,
3
- ListLab as ListLab,
1
+ from gismap.lab.labmap import (
2
+ LabMap as Map,
3
+ ListMap as ListMap,
4
4
  )
5
- from gismap.lab.lip6 import Lip6 as Lip6, Lip6Lab as Lip6Lab
6
- from gismap.lab.toulouse import Solace as Solace, LaasLab as LaasLab
7
- from gismap.lab.graph import lab2graph as lab2graph
8
- from gismap.lab.vis import generate_html as generate_html
9
5
  from gismap.lab.egomap import EgoMap as EgoMap
10
6
  from gismap.lab.lab_author import LabAuthor as LabAuthor
gismap/lab/egomap.py CHANGED
@@ -1,12 +1,12 @@
1
- from gismap.lab.lab import Lab
1
+ from gismap.lab.labmap import LabMap
2
2
  from gismap.lab.lab_author import LabAuthor
3
3
 
4
4
 
5
- class EgoMap(Lab):
5
+ class EgoMap(LabMap):
6
6
  """
7
7
  Parameters
8
8
  ----------
9
- sun
9
+ star
10
10
  args
11
11
  kwargs
12
12
 
@@ -15,28 +15,27 @@ class EgoMap(Lab):
15
15
 
16
16
  >>> dang = EgoMap("The-Dang Huynh", dbs="hal")
17
17
  >>> dang.build(target=10)
18
- >>> sorted(a.name for a in dang.authors.values()) # doctest: +NORMALIZE_WHITESPACE
19
- ['Bruno Kauffmann', 'Chung Shue Chen', 'Fabien Mathieu', 'François Baccelli', 'Laurent Viennot', 'Ludovic Noirie',
20
- 'Siu-Wai Ho', 'Sébastien Tixeuil', 'The-Dang Huynh', 'Yannick Carlinet']
18
+ >>> sorted(a.name for a in dang.authors.values()) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
19
+ ['Bruno Kauffmann', 'Chung Shue Chen', 'Fabien Mathieu',...
21
20
  """
22
21
 
23
- def __init__(self, sun, *args, **kwargs):
24
- if isinstance(sun, str):
25
- sun = LabAuthor(sun)
26
- sun.metadata.position = (0, 0)
27
- self.sun = sun
22
+ def __init__(self, star, *args, **kwargs):
23
+ if isinstance(star, str):
24
+ star = LabAuthor(star)
25
+ star.metadata.position = (0, 0)
26
+ star.metadata.group = "star"
27
+ self.star = star
28
28
  super().__init__(*args, **kwargs)
29
29
 
30
30
  def _author_iterator(self):
31
- yield self.sun
31
+ yield self.star
32
32
 
33
33
  def build(self, **kwargs):
34
34
  target = kwargs.pop("target", 50)
35
- group = kwargs.pop("group", "moon")
36
- self.update_authors(desc="Sun metadata")
37
- self.update_publis(desc="Sun publications")
35
+ self.update_authors(desc="Star metadata")
36
+ self.update_publis(desc="Star publications")
37
+ kwargs["target"] = target - len(self.authors)
38
+ self.expand(group="planet", desc="Planets", **kwargs)
38
39
  kwargs["target"] = target - len(self.authors)
39
- self.expand(group=None, desc="Planets", **kwargs)
40
- kwargs.update({"target": target - len(self.authors), "group": group})
41
40
  if kwargs["target"] > 0:
42
- self.expand(desc="Moons", **kwargs)
41
+ self.expand(group="moon", desc="Moons", **kwargs)
gismap/lab/expansion.py CHANGED
@@ -53,7 +53,7 @@ def count_prospect_entries(lab):
53
53
 
54
54
  Parameters
55
55
  ----------
56
- lab: :class:`~gismap.lab.lab.Lab`
56
+ lab: :class:`~gismap.lab.labmap.LabMap`
57
57
  Reference lab.
58
58
 
59
59
  Returns
@@ -73,8 +73,8 @@ def count_prospect_entries(lab):
73
73
  count_publications.append(a.key)
74
74
  else:
75
75
  lab_authors.add(a.key)
76
- for l in lab_authors:
77
- count_coauthors[l].update(new_authors)
76
+ for a in lab_authors:
77
+ count_coauthors[a].update(new_authors)
78
78
 
79
79
  count_coauthors = Counter(
80
80
  k for new_authors in count_coauthors.values() for k in new_authors
@@ -117,7 +117,7 @@ def get_prospects(lab):
117
117
  """
118
118
  Parameters
119
119
  ----------
120
- lab: :class:`~gismap.lab.lab.Lab`
120
+ lab: :class:`~gismap.lab.labmap.LabMap`
121
121
  Reference lab.
122
122
 
123
123
  Returns
@@ -150,7 +150,7 @@ def get_member_names(lab):
150
150
  """
151
151
  Parameters
152
152
  ----------
153
- lab: :class:`~gismap.lab.lab.Lab`
153
+ lab: :class:`~gismap.lab.labmap.LabMap`
154
154
  Reference lab.
155
155
 
156
156
  Returns
@@ -222,12 +222,13 @@ def proper_prospects(
222
222
  locs = [j for j in np.where(jc[i, :] > threshold)[0] if not done[j]]
223
223
  done[locs] = True
224
224
  sources = sort_author_sources([prospects[j].author for j in locs])
225
- strength = sum(prospects[j].score for j in locs)
226
- new_author = LabAuthor.from_sources(sources)
227
- new_lab.append((strength, new_author))
225
+ if sources:
226
+ strength = sum(prospects[j].score for j in locs)
227
+ new_author = LabAuthor.from_sources(sources)
228
+ new_lab.append((strength, new_author))
228
229
 
229
230
  # Extract top prospects
230
- new_lab = [l[1] for l in sorted(new_lab, key=lambda l: l[0], reverse=True)][
231
+ new_lab = [a[1] for a in sorted(new_lab, key=lambda a: a[0], reverse=True)][
231
232
  :max_new
232
233
  ]
233
234
  new_rosetta = {s.key: a for a in new_lab for s in a.sources}
gismap/lab/filters.py CHANGED
@@ -1,18 +1,93 @@
1
- def taboos(txt, words):
1
+ import re
2
+
3
+ # editorials = re.compile(r"ditorial|foreword", re.IGNORECASE)
4
+ # charlatans = re.compile(r"Raoult|Kofman|Buob")
5
+
6
+ editorials = ["ditorial", "Foreword", "Brief Announcement", "Preface", "Préface"]
7
+ charlatans = ["Raoult", "Kofman", "Buob"]
8
+
9
+
10
+ def re_filter(words):
11
+ """
12
+ Parameters
13
+ ----------
14
+ words: :class:`list` or :class:`str`
15
+ List of word(s) to filter.
16
+
17
+ Returns
18
+ -------
19
+ callable
20
+ Filter function.
21
+ """
2
22
  if isinstance(words, str):
3
- return words not in txt
23
+ taboo = re.compile(words)
4
24
  else:
5
- return all(w not in txt for w in words)
25
+ taboo = re.compile("|".join(words))
26
+ return lambda txt: taboo.search(txt) is None
27
+
28
+
29
+ def publication_size_filter(n_max=9):
30
+ """
31
+ Parameters
32
+ ----------
33
+ n_max: int, default=9
34
+ Maximum number of co-authors allowed.
35
+
36
+ Returns
37
+ -------
38
+ callable
39
+ Filter on number of co-authors.
40
+ """
41
+ return lambda p: len(p.authors) <= n_max
42
+
43
+
44
+ def publication_oneword_filter(n_min=2):
45
+ """
46
+
47
+ Parameters
48
+ ----------
49
+ n_min: int, default=2
50
+ Minimum number of words required in the title.
51
+
52
+ Returns
53
+ -------
54
+ callable
55
+ Filter on number of words required in the title.
56
+ """
57
+ return lambda p: len(p.title.split()) >= n_min
6
58
 
7
- def publication_size_filter(n_max=10):
8
- return lambda p: len(p.authors) < n_max
9
59
 
10
60
  def publication_taboo_filter(w=None):
61
+ """
62
+ Parameters
63
+ ----------
64
+ w: :class:`list`, optional
65
+ List of words to filter.
66
+
67
+ Returns
68
+ -------
69
+ Callable
70
+ Filter function on publications.
71
+ """
11
72
  if w is None:
12
- w = ["Editorial", "Foreword"]
13
- return lambda p: taboos(p.title, w)
73
+ w = editorials
74
+ regex = re_filter(w)
75
+ return lambda p: regex(p.title)
76
+
14
77
 
15
78
  def author_taboo_filter(w=None):
79
+ """
80
+ Parameters
81
+ ----------
82
+ w: :class:`list`, optional
83
+ List of words to filter.
84
+
85
+ Returns
86
+ -------
87
+ Callable
88
+ Filter function on authors.
89
+ """
16
90
  if w is None:
17
- w = ["Buob", "Kofman"]
18
- return lambda a: taboos(a.name, w)
91
+ w = charlatans
92
+ regex = re_filter(w)
93
+ return lambda p: regex(p.name)
gismap/lab/lab_author.py CHANGED
@@ -1,7 +1,8 @@
1
1
  from dataclasses import dataclass, field
2
+ import re
2
3
 
3
4
  from gismap import get_classes, HAL, DBLP
4
- from gismap.sources.models import DB
5
+ from gismap.sources.models import DB, db_class_to_auth_class
5
6
  from gismap.sources.multi import SourcedAuthor, sort_author_sources
6
7
  from gismap.utils.common import LazyRepr, list_of_objects
7
8
  from gismap.utils.logger import logger
@@ -26,6 +27,8 @@ class AuthorMetadata(LazyRepr):
26
27
  Group of the author.
27
28
  position: :class:`tuple`
28
29
  Coordinates of the author.
30
+ keys: :class:`dict`
31
+ Some DB key values of the author.
29
32
  """
30
33
 
31
34
  url: str = None
@@ -36,6 +39,26 @@ class AuthorMetadata(LazyRepr):
36
39
 
37
40
  @dataclass(repr=False)
38
41
  class LabAuthor(SourcedAuthor):
42
+ """
43
+ Examples
44
+ ---------
45
+ The metadata and DB key(s) of an author can be entered in parentheses using key/values.
46
+
47
+ Improper key/values are ignored (with a warning).
48
+
49
+
50
+ >>> dummy= LabAuthor("My Name(img: https://my.url.img, group:me,url:https://mysite.org,hal:key1,dblp:toto,badkey:hello,no_colon_separator)")
51
+ >>> dummy.metadata
52
+ AuthorMetadata(url='https://mysite.org', img='https://my.url.img', group='me')
53
+ >>> dummy.sources
54
+ [HALAuthor(name='My Name', key='key1'), DBLPAuthor(name='My Name', key='toto')]
55
+
56
+ You can enter multiple keys for the same DB. HAL key types are automatically detected.
57
+
58
+ >>> dummy2= LabAuthor("My Name (hal:key1,hal:123456,hal: My Other Name )")
59
+ >>> dummy2.sources
60
+ [HALAuthor(name='My Name', key='key1'), HALAuthor(name='My Name', key='123456', key_type='pid'), HALAuthor(name='My Name', key='My Other Name', key_type='fullname')]
61
+ """
39
62
  metadata: AuthorMetadata = field(default_factory=AuthorMetadata)
40
63
 
41
64
  def auto_img(self):
@@ -45,6 +68,30 @@ class LabAuthor(SourcedAuthor):
45
68
  self.metadata.img = img
46
69
  break
47
70
 
71
+ def __post_init__(self):
72
+ pattern = r"\s*([^,(]+)\s*(?:\(([^)]*)\))?\s*$"
73
+ match = re.match(pattern, self.name)
74
+ if match:
75
+ self.name = match.group(1).strip()
76
+ content = match.group(2)
77
+ if content:
78
+ for kv in content.split(","):
79
+ if ":" not in kv:
80
+ logger.warning(f"I don't know what to do with {kv}.")
81
+ continue
82
+ k, v = kv.split(":", 1)
83
+ k = k.strip().lower()
84
+ v = v.strip()
85
+ if k in db_dict:
86
+ DBAuthor = db_class_to_auth_class(db_dict[k])
87
+ self.sources.append(DBAuthor(name=self.name, key=v))
88
+ elif k in ["url", "img", "group"]:
89
+ setattr(self.metadata, k, v)
90
+ else:
91
+ logger.warning(f"I don't know what to do with {kv}.")
92
+ else:
93
+ self.name = self.name.strip()
94
+
48
95
  def auto_sources(self, dbs=None):
49
96
  """
50
97
  Automatically populate the sources based on author's name.
@@ -63,9 +110,9 @@ class LabAuthor(SourcedAuthor):
63
110
  for db in dbs:
64
111
  source = db.search_author(self.name)
65
112
  if len(source) == 0:
66
- logger.warning(f"{self.name} not found in {db.db_name}")
113
+ logger.info(f"{self.name} not found in {db.db_name}")
67
114
  elif len(source) > 1:
68
- logger.warning(f"Multiple entries for {self.name} in {db.db_name}")
115
+ logger.info(f"Multiple entries for {self.name} in {db.db_name}")
69
116
  sources += source
70
117
  if len(sources) > 0:
71
118
  self.sources = sort_author_sources(sources)
@@ -74,7 +121,7 @@ class LabAuthor(SourcedAuthor):
74
121
  def labify_author(author, rosetta):
75
122
  if isinstance(author, LabAuthor):
76
123
  return author
77
- return rosetta.get(author.key, author)
124
+ return rosetta.get(author.key, rosetta.get(author.name, author))
78
125
 
79
126
 
80
127
  def labify_publications(pubs, rosetta):