gismap 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gismap/build.py CHANGED
@@ -1,4 +1,26 @@
1
+ """
2
+ Build script for creating the LDB (Local DBLP) database.
3
+
4
+ Run as a module to download and process the DBLP dataset:
5
+
6
+ python -m gismap.build
7
+
8
+ This will fetch the DBLP RDF dump from the website and create a compressed local database.
9
+ """
10
+
1
11
  if __name__ == "__main__":
12
+ import argparse
2
13
  from gismap.sources.ldb import LDB
14
+
15
+ parser = argparse.ArgumentParser(
16
+ description="Build LDB database from DBLP TTL dump."
17
+ )
18
+ parser.add_argument(
19
+ "--no-search",
20
+ action="store_true",
21
+ help="Exclude search engine from output (for GitHub assets).",
22
+ )
23
+ args = parser.parse_args()
24
+
3
25
  LDB.build_db()
4
- LDB.dump_db()
26
+ LDB.dump_db(include_search=not args.no_search)
@@ -27,22 +27,37 @@ gislink = tags.a(
27
27
 
28
28
  def make_vis(lab, **kwargs):
29
29
  """
30
+ Generate HTML visualization of a lab's collaboration network.
31
+
30
32
  Parameters
31
33
  ----------
32
34
  lab: :class:`~gismap.lab.labmap.LabMap`
33
35
  Lab to display.
34
-
35
- Other parameters
36
- ----------------
37
- uid: :class:`str`
38
- Unique identifier.
39
- vis_url: :class:`str`
40
- Location of visJS network.
36
+ uid: :class:`str`, optional
37
+ Unique identifier for DOM elements. Auto-generated if not provided.
38
+ vis_url: :class:`str`, optional
39
+ URL to vis-network library.
40
+ groups: :class:`dict`, optional
41
+ Group styling configuration.
42
+ draw_legend: :class:`bool`, optional
43
+ Whether to draw the legend. Defaults to True if multiple groups.
44
+ physics: :class:`dict`, optional
45
+ Physics engine configuration.
46
+ nodes_options: :class:`dict`, optional
47
+ Node styling options.
48
+ edges_options: :class:`dict`, optional
49
+ Edge styling options.
50
+ interaction_options: :class:`dict`, optional
51
+ Interaction settings.
52
+ style: :class:`string.Template`, optional
53
+ CSS template.
54
+ script: :class:`string.Template`, optional
55
+ JavaScript template.
41
56
 
42
57
  Returns
43
58
  -------
44
59
  :class:`str`
45
- HTML code.
60
+ HTML code as a string.
46
61
  """
47
62
  uid = kwargs.pop("uid", None)
48
63
  if uid is None:
gismap/gisgraphs/graph.py CHANGED
@@ -21,6 +21,21 @@ def initials(name):
21
21
 
22
22
 
23
23
  def linkify(name, url):
24
+ """
25
+ Wrap a name in an HTML link if URL is provided.
26
+
27
+ Parameters
28
+ ----------
29
+ name : :class:`str`
30
+ Display text.
31
+ url : :class:`str` or None
32
+ Target URL, or None for no link.
33
+
34
+ Returns
35
+ -------
36
+ :class:`str`
37
+ HTML anchor tag or span.
38
+ """
24
39
  if url:
25
40
  return f'<a href="{url}" target="_blank">{name}</a>'
26
41
  else:
@@ -33,7 +33,7 @@ def safe_filename(name):
33
33
  return f"gismap-{safe_str[:60]}.html"
34
34
 
35
35
 
36
- place_holder = "Diego Perino, The-Dang Huynh, François Durand (hal: fradurand, ldb: 38/11269), Rim Kaddah, Leonardo Linguaglossa, Céline Comte"
36
+ place_holder = "Diego Perino, The-Dang Huynh, François Durand (hal: fradurand, dblp: 38/11269), Rim Kaddah, Leonardo Linguaglossa, Céline Comte"
37
37
 
38
38
 
39
39
  class GismapWidget:
@@ -66,7 +66,7 @@ class GismapWidget:
66
66
  layout=widgets.Layout(width="50%", height="100px"),
67
67
  )
68
68
  self.dbs = widgets.RadioButtons(
69
- options=["HAL", "LDB", "Both"],
69
+ options=["HAL", "DBLP", "Both"],
70
70
  description="DB(s):",
71
71
  layout=widgets.Layout(width="80px", max_width="20%"),
72
72
  )
@@ -97,12 +97,20 @@ class GismapWidget:
97
97
  self.show = True
98
98
 
99
99
  def html(self):
100
+ """
101
+ Generate the HTML visualization based on widget inputs.
102
+
103
+ Returns
104
+ -------
105
+ :class:`str`
106
+ HTML content for the collaboration graph.
107
+ """
100
108
  dbs = (
101
109
  "hal"
102
110
  if self.dbs.value == "HAL"
103
- else "ldb"
104
- if self.dbs.value == "LDB"
105
- else ["hal", "ldb"]
111
+ else "dblp"
112
+ if self.dbs.value == "DBLP"
113
+ else ["hal", "dblp"]
106
114
  )
107
115
  name = self.names.value
108
116
  pattern = r",\s*(?![^()]*\))"
@@ -128,6 +136,20 @@ class GismapWidget:
128
136
  return lab.html()
129
137
 
130
138
  def compute_function(self, b, show=True):
139
+ """
140
+ Handle compute button click and generate visualization.
141
+
142
+ Parameters
143
+ ----------
144
+ b : :class:`ipywidgets.Button`
145
+ The button widget that triggered this callback.
146
+ show : :class:`bool`, default=True
147
+ Whether to display the result in the widget.
148
+
149
+ Returns
150
+ -------
151
+ None
152
+ """
131
153
  self.show = show
132
154
  full = self.html()
133
155
  b64 = base64.b64encode(
@@ -135,9 +157,7 @@ class GismapWidget:
135
157
  ).decode("utf8")
136
158
  payload = f"data:text/html;base64,{b64}"
137
159
  savename = safe_filename(self.names.value)
138
- link_html = (
139
- f"<a href='{payload}' download='{savename}'>Download the Map!</a>"
140
- )
160
+ link_html = f"<a href='{payload}' download='{savename}'>Download the Map!</a>"
141
161
  self.save_link.value = link_html
142
162
  if show:
143
163
  self.out.clear_output()
gismap/lab/egomap.py CHANGED
@@ -4,19 +4,27 @@ from gismap.lab.lab_author import LabAuthor
4
4
 
5
5
  class EgoMap(LabMap):
6
6
  """
7
+ Egocentric view of a researcher's collaboration network.
8
+
9
+ Displays the *star* (central researcher), their *planets* (direct co-authors),
10
+ and optionally *moons* (co-authors of co-authors).
11
+
7
12
  Parameters
8
13
  ----------
9
- star
10
- args
11
- kwargs
14
+ star: :class:`str` or :class:`~gismap.lab.lab_author.LabAuthor`
15
+ The central researcher. Can be a name string or LabAuthor object.
16
+ *args
17
+ Passed to :class:`~gismap.lab.labmap.LabMap`.
18
+ **kwargs
19
+ Passed to :class:`~gismap.lab.labmap.LabMap`.
12
20
 
13
21
  Examples
14
22
  --------
15
23
 
16
- >>> dang = EgoMap("The-Dang Huynh", dbs="hal")
17
- >>> dang.build(target=10)
18
- >>> sorted(a.name for a in dang.authors.values()) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
19
- ['Bruno Kauffmann', 'Chung Shue Chen', 'Fabien Mathieu',...
24
+ >>> dang = EgoMap("The-Dang Huynh")
25
+ >>> dang.build(target=20)
26
+ >>> sorted(a.name for a in dang.authors.values() if len(a.name.split())<3) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
27
+ ['Bruno Kauffmann', 'Diego Perino', 'Dohy Hong', 'Fabien Mathieu', 'François Baccelli',...]
20
28
  """
21
29
 
22
30
  def __init__(self, star, *args, **kwargs):
@@ -31,6 +39,20 @@ class EgoMap(LabMap):
31
39
  yield self.star
32
40
 
33
41
  def build(self, **kwargs):
42
+ """
43
+ Build the ego network by fetching publications and adding planets/moons.
44
+
45
+ Parameters
46
+ ----------
47
+ target : :class:`int`, default=50
48
+ Target number of authors in the final map.
49
+ **kwargs
50
+ Passed to :meth:`~gismap.lab.labmap.LabMap.expand`.
51
+
52
+ Returns
53
+ -------
54
+ None
55
+ """
34
56
  target = kwargs.pop("target", 50)
35
57
  self.update_authors(desc="Star metadata")
36
58
  self.update_publis(desc="Star publications")
gismap/lab/expansion.py CHANGED
@@ -139,7 +139,14 @@ def get_prospects(lab):
139
139
  @dataclass
140
140
  class Member:
141
141
  """
142
- Basic information
142
+ Basic information about a lab member for name matching.
143
+
144
+ Parameters
145
+ ----------
146
+ name : :class:`str`
147
+ Normalized name.
148
+ key : :class:`str`
149
+ Author key.
143
150
  """
144
151
 
145
152
  name: str
@@ -190,6 +197,33 @@ def trim_sources(author):
190
197
  def proper_prospects(
191
198
  lab, length_impact=0.05, threshold=80, n_range=4, max_new=None, trim=True
192
199
  ):
200
+ """
201
+ Find and rank external collaborators for potential lab expansion.
202
+
203
+ Identifies authors from publications who are not already lab members,
204
+ groups them by name similarity, and ranks by collaboration strength.
205
+
206
+ Parameters
207
+ ----------
208
+ lab : :class:`~gismap.lab.labmap.LabMap`
209
+ Reference lab.
210
+ length_impact : :class:`float`, default=0.05
211
+ Length impact for name similarity matching.
212
+ threshold : :class:`int`, default=80
213
+ Similarity threshold for grouping authors.
214
+ n_range : :class:`int`, default=4
215
+ N-gram range for name comparison.
216
+ max_new : :class:`int`, optional
217
+ Maximum number of new authors to return.
218
+ trim : :class:`bool`, default=True
219
+ If True, keep only one source per database for each author.
220
+
221
+ Returns
222
+ -------
223
+ :class:`tuple`
224
+ (existing, new_rosetta) where existing maps external keys to lab member keys,
225
+ and new_rosetta maps source keys to new LabAuthor objects.
226
+ """
193
227
  member_names = get_member_names(lab)
194
228
  prospects = get_prospects(lab)
195
229
 
gismap/lab/lab_author.py CHANGED
@@ -56,6 +56,7 @@ class LabAuthor(SourcedAuthor):
56
56
  >>> dummy2.sources
57
57
  [HALAuthor(name='My Name', key='key1'), HALAuthor(name='My Name', key='123456', key_type='pid'), HALAuthor(name='My Name', key='My Other Name', key_type='fullname')]
58
58
  """
59
+
59
60
  metadata: AuthorMetadata = field(default_factory=AuthorMetadata)
60
61
 
61
62
  def auto_img(self):
@@ -116,12 +117,41 @@ class LabAuthor(SourcedAuthor):
116
117
 
117
118
 
118
119
  def labify_author(author, rosetta):
120
+ """
121
+ Convert a database author to a LabAuthor if possible.
122
+
123
+ Parameters
124
+ ----------
125
+ author : :class:`~gismap.sources.models.Author`
126
+ Author to convert.
127
+ rosetta : :class:`dict`
128
+ Mapping from keys/names to LabAuthor objects.
129
+
130
+ Returns
131
+ -------
132
+ :class:`~gismap.lab.lab_author.LabAuthor` or original author
133
+ LabAuthor if found in rosetta, otherwise the original author.
134
+ """
119
135
  if isinstance(author, LabAuthor):
120
136
  return author
121
137
  return rosetta.get(author.key, rosetta.get(author.name, author))
122
138
 
123
139
 
124
140
  def labify_publications(pubs, rosetta):
141
+ """
142
+ Convert publication authors to LabAuthors in place.
143
+
144
+ Parameters
145
+ ----------
146
+ pubs : :class:`list`
147
+ Publications to update.
148
+ rosetta : :class:`dict`
149
+ Mapping from keys/names to LabAuthor objects.
150
+
151
+ Returns
152
+ -------
153
+ None
154
+ """
125
155
  for pub in pubs:
126
156
  pub.authors = [labify_author(a, rosetta) for a in pub.authors]
127
157
  for source in getattr(pub, "sources", []):
gismap/lab/labmap.py CHANGED
@@ -91,7 +91,9 @@ class LabMap(MixInIO):
91
91
  if not all(f(author) for f in self.author_selectors):
92
92
  continue
93
93
  if len(author.sources) == 0:
94
- author.auto_sources(dbs=list_of_objects(self.dbs, db_dict, default=default_dbs))
94
+ author.auto_sources(
95
+ dbs=list_of_objects(self.dbs, db_dict, default=default_dbs)
96
+ )
95
97
  if author.sources:
96
98
  self.authors[author.key] = author
97
99
  if author.metadata.img is None:
@@ -152,9 +154,36 @@ class LabMap(MixInIO):
152
154
  return None
153
155
 
154
156
  def html(self, **kwargs):
157
+ """
158
+ Generate HTML representation of the collaboration graph.
159
+
160
+ Parameters
161
+ ----------
162
+ **kwargs
163
+ Passed to :func:`~gismap.gisgraphs.builder.make_vis`.
164
+
165
+ Returns
166
+ -------
167
+ :class:`str`
168
+ HTML content as a string.
169
+ """
155
170
  return make_vis(self, **kwargs)
156
171
 
157
172
  def save_html(self, name=None, **kwargs):
173
+ """
174
+ Save the collaboration graph as an HTML file.
175
+
176
+ Parameters
177
+ ----------
178
+ name: :class:`str`, optional
179
+ Output filename. Defaults to lab name.
180
+ **kwargs
181
+ Passed to :meth:`html`.
182
+
183
+ Returns
184
+ -------
185
+ None
186
+ """
158
187
  if name is None:
159
188
  name = self.name
160
189
  name = Path(name).with_suffix(".html")
@@ -162,6 +191,18 @@ class LabMap(MixInIO):
162
191
  f.write(self.html(**kwargs))
163
192
 
164
193
  def show_html(self, **kwargs):
194
+ """
195
+ Display the collaboration graph in a Jupyter notebook.
196
+
197
+ Parameters
198
+ ----------
199
+ **kwargs
200
+ Passed to :meth:`html`.
201
+
202
+ Returns
203
+ -------
204
+ None
205
+ """
165
206
  display(HTML(self.html(**kwargs)))
166
207
 
167
208
 
@@ -174,9 +215,9 @@ class ListMap(LabMap):
174
215
  author_list: :class:`list` of :class:`str`
175
216
  List of authors names.
176
217
  args: :class:`list`
177
- Arguments to pass to the :class:`~gismap.lab.lab.Lab` constuctor.
218
+ Arguments to pass to the :class:`~gismap.lab.lab.Lab` constructor.
178
219
  kwargs: :class:`dict`
179
- Keyword arguments to pass to the :class:`~gismap.lab.lab.Lab` constuctor.
220
+ Keyword arguments to pass to the :class:`~gismap.lab.lab.Lab` constructor.
180
221
  """
181
222
 
182
223
  def __init__(self, author_list, *args, **kwargs):
@@ -10,37 +10,50 @@ class CedricMap(LabMap):
10
10
  Class for handling a CNAM Cedric team from its name.
11
11
  Default to `roc` team.
12
12
  """
13
+
13
14
  name = "roc"
14
15
  base_url = "https://cedric.cnam.fr"
15
16
 
16
17
  def _author_iterator(self):
17
18
  soup = Soup(get(f"{self.base_url}/equipes/{self.name}/"), features="lxml")
18
- searchers = [li.a for ul in soup.find('div', {'id': 'annuaire'})('ul')[:3] for li in ul('li')]
19
+ searchers = [
20
+ li.a
21
+ for ul in soup.find("div", {"id": "annuaire"})("ul")[:3]
22
+ for li in ul("li")
23
+ ]
19
24
  done = set()
20
25
  for searcher in searchers:
21
- name = searcher.text.split('(')[0].strip()
26
+ name = searcher.text.split("(")[0].strip()
22
27
  if name in done:
23
28
  continue
24
29
  url = f"{self.base_url}{searcher['href']}"
25
30
  sousoup = Soup(get(url), features="lxml")
26
- img = sousoup.find('img', {'class': 'photo'})['src']
31
+ img = sousoup.find("img", {"class": "photo"})["src"]
27
32
  response = requests.head(img, allow_redirects=True)
28
33
  if int(response.headers.get("Content-Length")) < 3000:
29
34
  img = None
30
35
  done.add(name)
31
- yield LabAuthor(name=name, metadata=AuthorMetadata(url=url, img=img, group=self.name.upper()))
36
+ yield LabAuthor(
37
+ name=name,
38
+ metadata=AuthorMetadata(url=url, img=img, group=self.name.upper()),
39
+ )
32
40
 
33
41
 
34
42
  class CedricFull(LabMap):
35
43
  """
36
44
  Class for handling all CNAM Cedric teams using `https://cedric.cnam.fr/equipes` to get team names.
37
45
  """
46
+
38
47
  name = "Cedric"
39
48
 
40
49
  def _author_iterator(self):
41
50
  base = "https://cedric.cnam.fr/equipes/"
42
- soup = Soup(get(base), features="lxml")
43
- teams = {a['href'].split('/')[-2] for a in soup('a') if base in a.get('href', "") and len(a['href'])>len(base)}
51
+ soup = Soup(get(base), features="lxml")
52
+ teams = {
53
+ a["href"].split("/")[-2]
54
+ for a in soup("a")
55
+ if base in a.get("href", "") and len(a["href"]) > len(base)
56
+ }
44
57
  for team in teams:
45
58
  for author in CedricMap(name=team)._author_iterator():
46
59
  yield author
@@ -18,8 +18,8 @@ def lamsade_parse(div):
18
18
  :class:`tuple`
19
19
  name, image url (or None), webpage (or None)
20
20
  """
21
- img = div.img['src'] if div.img else None
22
- url = div.a['href'] if div.a else None
21
+ img = div.img["src"] if div.img else None
22
+ url = div.a["href"] if div.a else None
23
23
  name = div.h2.text.strip().title()
24
24
  name = " ".join(name.split(" ", 1)[::-1])
25
25
  return name, img, url
@@ -35,9 +35,11 @@ class Lamsade(LabMap):
35
35
  directory = "fr/personnes/enseignants-chercheurs-et-chercheurs.html"
36
36
 
37
37
  def _author_iterator(self):
38
- soup = Soup(get(self.base_url+self.directory), features="lxml")
39
- for a in soup('div', class_="dauphinecv-item"):
38
+ soup = Soup(get(self.base_url + self.directory), features="lxml")
39
+ for a in soup("div", class_="dauphinecv-item"):
40
40
  name, img, url = lamsade_parse(a)
41
- img = self.base_url+img if img else None
42
- url = self.base_url+url if url else None
43
- yield LabAuthor(name=name, metadata=AuthorMetadata(url=url, img=img, group=self.name))
41
+ img = self.base_url + img if img else None
42
+ url = self.base_url + url if url else None
43
+ yield LabAuthor(
44
+ name=name, metadata=AuthorMetadata(url=url, img=img, group=self.name)
45
+ )
@@ -29,18 +29,22 @@ class LaasMap(LabMap):
29
29
  if "public_avatar" in a.img["class"]
30
30
  else None
31
31
  )
32
- yield LabAuthor(name=name, metadata=AuthorMetadata(url=url, img=img, group=self.name.upper()))
32
+ yield LabAuthor(
33
+ name=name,
34
+ metadata=AuthorMetadata(url=url, img=img, group=self.name.upper()),
35
+ )
33
36
 
34
37
 
35
38
  class LaasFull(LabMap):
36
39
  """
37
40
  Class for handling all LAAS teams using `https://www.laas.fr/fr/equipes/` to get team names.
38
41
  """
42
+
39
43
  name = "LAAS"
40
44
 
41
45
  def _author_iterator(self):
42
46
  soup = Soup(get("https://www.laas.fr/fr/equipes/"), features="lxml")
43
- teams = [a['href'].split('/')[-2] for a in soup('a', {'class': "badge"})]
47
+ teams = [a["href"].split("/")[-2] for a in soup("a", {"class": "badge"})]
44
48
  for team in teams:
45
49
  for author in LaasMap(name=team)._author_iterator():
46
50
  yield author
gismap/search.py CHANGED
@@ -6,7 +6,17 @@ from gismap.utils.text import reduce_keywords, Corrector
6
6
 
7
7
  class SearchAction:
8
8
  """
9
- Blueprint for extracting search results out of a gismo.
9
+ Base class for extracting search results from a Gismo.
10
+
11
+ Subclasses should implement :meth:`process` to define
12
+ how to extract results from the gismo.
13
+
14
+ Parameters
15
+ ----------
16
+ name : :class:`str`, optional
17
+ Name of this action (used as key in results dict).
18
+ post : callable, optional
19
+ Post-processing function applied to results.
10
20
  """
11
21
 
12
22
  def __init__(self, name=None, post=None):
@@ -14,9 +24,33 @@ class SearchAction:
14
24
  self.post = (lambda x: x) if post is None else post
15
25
 
16
26
  def process(self, gismo):
27
+ """
28
+ Extract results from the gismo. Must be implemented by subclasses.
29
+
30
+ Parameters
31
+ ----------
32
+ gismo : :class:`~gismo.gismo.Gismo`
33
+ The gismo to query.
34
+
35
+ Returns
36
+ -------
37
+ Results (type depends on subclass).
38
+ """
17
39
  raise NotImplementedError
18
40
 
19
41
  def run(self, gismo):
42
+ """
43
+ Execute the action and apply post-processing.
44
+
45
+ Parameters
46
+ ----------
47
+ gismo : :class:`~gismo.gismo.Gismo`
48
+ The gismo to query.
49
+
50
+ Returns
51
+ -------
52
+ Post-processed results.
53
+ """
20
54
  return self.post(self.process(gismo))
21
55
 
22
56
 
@@ -153,6 +187,19 @@ publi_template = Template("""
153
187
 
154
188
 
155
189
  def publi_to_html(publi):
190
+ """
191
+ Convert a publication to an HTML list item.
192
+
193
+ Parameters
194
+ ----------
195
+ publi : :class:`~gismap.sources.models.Publication`
196
+ Publication to convert.
197
+
198
+ Returns
199
+ -------
200
+ :class:`str`
201
+ HTML list item string.
202
+ """
156
203
  dico = dict()
157
204
  for db in ["hal", "dblp"]:
158
205
  source = publi.sources.get(db)
@@ -167,6 +214,19 @@ def publi_to_html(publi):
167
214
 
168
215
 
169
216
  def publis_to_html(publis):
217
+ """
218
+ Convert a list of publications to an HTML unordered list.
219
+
220
+ Parameters
221
+ ----------
222
+ publis : :class:`list`
223
+ List of publications.
224
+
225
+ Returns
226
+ -------
227
+ :class:`str`
228
+ HTML unordered list string.
229
+ """
170
230
  rows = "\n".join(publi_to_html(p) for p in publis)
171
231
  return f"<ul>\n{rows}\n</ul>"
172
232
 
gismap/sources/dblp.py CHANGED
@@ -98,6 +98,7 @@ class DBLPAuthor(Author, DBLP):
98
98
  key='conf/sss/Mathieu07')
99
99
 
100
100
  """
101
+
101
102
  key: str
102
103
  aliases: list = field(default_factory=list)
103
104
 
@@ -124,6 +125,27 @@ DBLP_TYPES = {
124
125
 
125
126
  @dataclass(repr=False)
126
127
  class DBLPPublication(Publication, DBLP):
128
+ """
129
+ Publication from the DBLP database.
130
+
131
+ Parameters
132
+ ----------
133
+ title: :class:`str`
134
+ Publication title.
135
+ authors: :class:`list`
136
+ List of :class:`DBLPAuthor` objects.
137
+ venue: :class:`str`
138
+ Publication venue.
139
+ type: :class:`str`
140
+ Publication type.
141
+ year: :class:`int`
142
+ Publication year.
143
+ key: :class:`str`
144
+ DBLP record key.
145
+ metadata: :class:`dict`
146
+ Additional metadata (pages, volume, etc.).
147
+ """
148
+
127
149
  key: str
128
150
  metadata: dict = field(default_factory=dict)
129
151