gismap 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gismap/lab/vis.py CHANGED
@@ -13,39 +13,143 @@ vis_template = Template("""
13
13
  </div>
14
14
  <style>
15
15
  /* Styles adaptatifs pour dark/light */
16
+ /* Default: dark mode styles */
17
+ #${container_id} {
18
+ width: 100%;
19
+ height: 80vh !important;
20
+ max-width: 100vw;
21
+ max-height: 100vh !important;
22
+ box-sizing: border-box;
23
+ border: 1px solid #444;
24
+ background: #181818;
25
+ }
26
+
27
+ .modal {
28
+ display: none;
29
+ position: fixed;
30
+ z-index: 1000;
31
+ left: 0; top: 0;
32
+ width: 100%; height: 100%;
33
+ overflow: auto;
34
+ background-color: rgba(10,10,10,0.85);
35
+ }
36
+
37
+ .modal-content {
38
+ background-color: #23272e;
39
+ color: #f0f0f0;
40
+ margin: 10% auto;
41
+ padding: 24px;
42
+ border: 1px solid #888;
43
+ width: 50%;
44
+ border-radius: 8px;
45
+ box-shadow: 0 5px 15px rgba(0,0,0,.6);
46
+ }
47
+
48
+ .close {
49
+ color: #aaa;
50
+ float: right;
51
+ font-size: 28px;
52
+ font-weight: bold;
53
+ cursor: pointer;
54
+ }
55
+
56
+ .close:hover, .close:focus {
57
+ color: #fff;
58
+ text-decoration: none;
59
+ cursor: pointer;
60
+ }
61
+
62
+ /* PyData Sphinx Light Theme */
63
+ html[data-theme="light"] #${container_id},
64
+ body[data-jp-theme-light="true"] #${container_id} {
65
+ background: #fff;
66
+ border: 1px solid #ccc;
67
+ }
68
+
69
+ html[data-theme="light"] .modal,
70
+ body[data-jp-theme-light="true"] .modal {
71
+ background-color: rgba(220,220,220,0.85);
72
+ }
73
+
74
+ html[data-theme="light"] .modal-content,
75
+ body[data-jp-theme-light="true"] .modal-content {
76
+ background: #fff;
77
+ color: #222;
78
+ border: 1px solid #888;
79
+ }
80
+
81
+ html[data-theme="light"] .close,
82
+ body[data-jp-theme-light="true"] .close {
83
+ color: #222;
84
+ }
85
+
86
+ html[data-theme="light"] .close:hover, html[data-theme="light"] .close:focus,
87
+ body[data-jp-theme-light="true"] .close:hover, body[data-jp-theme-light="true"] .close:focus {
88
+ color: #555;
89
+ }
90
+
91
+ /* Fallback: system light mode */
92
+ @media (prefers-color-scheme: light) {
16
93
  #${container_id} {
17
- width: 1200px; height: 800px; border: 1px solid #444;
18
- background: #181818;
94
+ background: #fff;
95
+ border: 1px solid #ccc;
19
96
  }
20
97
  .modal {
21
- display: none; position: fixed; z-index: 1000; left: 0; top: 0; width: 100%; height: 100%;
22
- overflow: auto; background-color: rgba(10,10,10,0.85);
98
+ background-color: rgba(220,220,220,0.85);
23
99
  }
24
100
  .modal-content {
25
- background-color: #23272e; color: #f0f0f0;
26
- margin: 10% auto; padding: 24px; border: 1px solid #888;
27
- width: 50%; border-radius: 8px;
28
- box-shadow: 0 5px 15px rgba(0,0,0,.6);
101
+ background: #fff;
102
+ color: #222;
103
+ border: 1px solid #888;
29
104
  }
30
105
  .close {
31
- color: #aaa; float: right; font-size: 28px; font-weight: bold; cursor: pointer;
106
+ color: #222;
32
107
  }
33
- .close:hover, .close:focus { color: #fff; text-decoration: none; cursor: pointer; }
34
- @media (prefers-color-scheme: light) {
35
- #${container_id} { background: #fff; border: 1px solid #ccc; }
36
- .modal { background-color: rgba(220,220,220,0.85); }
37
- .modal-content { background: #fff; color: #222; border: 1px solid #888; }
38
- .close { color: #222; }
39
- .close:hover, .close:focus { color: #555; }
108
+ .close:hover, .close:focus {
109
+ color: #555;
40
110
  }
111
+ }
41
112
  </style>
42
- <script type="text/javascript" src="https://unpkg.com/vis-network/standalone/umd/vis-network.min.js"></script>
113
+ <script type="text/javascript">
114
+ if (typeof vis === 'undefined') {
115
+ var script = document.createElement('script');
116
+ script.src = "https://unpkg.com/vis-network/standalone/umd/vis-network.min.js";
117
+ script.type = "text/javascript";
118
+ script.onload = function() {
119
+ console.log("vis-network loaded dynamically");
120
+ // You can trigger your graph init here if needed
121
+ };
122
+ document.head.appendChild(script);
123
+ } else {
124
+ console.log("vis-network already loaded");
125
+ }
126
+ </script>
43
127
  <script type="text/javascript">
44
128
  (function() {
45
129
  // Détection du thème
46
- function getTheme() {
47
- return window.matchMedia('(prefers-color-scheme: dark)').matches ? 'dark' : 'light';
130
+ function getTheme() {
131
+ // Try PyData Sphinx theme on <html>
132
+ const pydataTheme = document.documentElement.getAttribute("data-theme");
133
+ if (pydataTheme === "dark" || pydataTheme === "light") {
134
+ return pydataTheme;
48
135
  }
136
+
137
+ // Try JupyterLab theme on <body>
138
+ const jupyterLabTheme = document.body.getAttribute("data-jp-theme-name");
139
+ if (jupyterLabTheme) {
140
+ // Simplify theme name to 'dark' or 'light'
141
+ const lowerName = jupyterLabTheme.toLowerCase();
142
+ if (lowerName.includes("dark")) {
143
+ return "dark";
144
+ }
145
+ if (lowerName.includes("light")) {
146
+ return "light";
147
+ }
148
+ }
149
+
150
+ // Fallback to system preference
151
+ return window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches ? "dark" : "light";
152
+ };
49
153
  function getVisOptions(theme) {
50
154
  if (theme === 'dark') {
51
155
  return {
@@ -86,15 +190,41 @@ vis_template = Template("""
86
190
  interaction: { hover: true }
87
191
  };
88
192
  }
89
- }
193
+ };
90
194
  const nodes = new vis.DataSet(${nodes_json});
91
195
  const edges = new vis.DataSet(${edges_json});
92
196
  const container = document.getElementById('${container_id}');
93
197
  let network = null;
198
+
199
+ var physics = {
200
+ physics: {
201
+ solver: "forceAtlas2Based",
202
+ forceAtlas2Based: {
203
+ gravitationalConstant: -50,
204
+ centralGravity: 0.01,
205
+ springLength: 200,
206
+ springConstant: 0.08,
207
+ damping: 0.98,
208
+ avoidOverlap: 1
209
+ },
210
+ maxVelocity: 10,
211
+ minVelocity: 0.9,
212
+ stabilization: {
213
+ enabled: true,
214
+ iterations: 2000,
215
+ updateInterval: 50,
216
+ onlyDynamicEdges: false,
217
+ fit: true
218
+ },
219
+ timestep: 0.25
220
+ }
221
+ };
222
+
94
223
  function renderNetwork() {
95
224
  const theme = getTheme();
96
225
  const options = getVisOptions(theme);
97
226
  network = new vis.Network(container, { nodes: nodes, edges: edges }, options);
227
+ network.setOptions(physics)
98
228
  // Tooltip survol
99
229
  network.on("hoverNode", function(params) {
100
230
  const node = nodes.get(params.node);
@@ -131,12 +261,22 @@ vis_template = Template("""
131
261
  window.onclick = function(event) {
132
262
  if (event.target == modal) { modal.style.display = "none"; }
133
263
  };
134
- }
264
+ };
135
265
  renderNetwork();
266
+
136
267
  // Adapter dynamiquement si le thème change
137
- window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', function() {
138
- renderNetwork();
139
- });
268
+ window.addEventListener("theme-changed", () => renderNetwork());
269
+ const observer = new MutationObserver(mutations => {
270
+ for (const mutation of mutations) {
271
+ if (mutation.type === "attributes" && mutation.attributeName === "data-jp-theme-name") {
272
+ renderNetwork();
273
+ }
274
+ }
275
+ });
276
+ observer.observe(document.body, { attributes: true });
277
+ if (window.matchMedia) {
278
+ window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', () => renderNetwork());
279
+ };
140
280
  })();
141
281
  </script>
142
282
  """)
gismap/sources/dblp.py CHANGED
@@ -5,15 +5,15 @@ from bs4 import BeautifulSoup as Soup
5
5
  from time import sleep
6
6
 
7
7
  from gismap.sources.models import DB, Author, Publication
8
- from gismap.utils.text import clean_aliases
8
+ from gismap.utils.text import clean_aliases, auto_int
9
9
  from gismap.utils.requests import get
10
10
 
11
11
 
12
12
  @dataclass(repr=False)
13
13
  class DBLP(DB):
14
14
  db_name: ClassVar[str] = "dblp"
15
- author_backoff: ClassVar[float] = 7.0
16
- publi_backoff: ClassVar[float] = 2.0
15
+ author_backoff: ClassVar[float] = 5.0
16
+ publi_backoff: ClassVar[float] = 1.0
17
17
 
18
18
  @classmethod
19
19
  def search_author(cls, name, wait=True):
@@ -84,17 +84,16 @@ class DBLP(DB):
84
84
  authors=[DBLPAuthor(name='Yacine Boufkhad', key='75/5742'), DBLPAuthor(name='Fabien Mathieu', key='66/2077'),
85
85
  DBLPAuthor(name='Fabien de Montgolfier', key='57/6313'), DBLPAuthor(name='Diego Perino', key='03/3645'),
86
86
  DBLPAuthor(name='Laurent Viennot', key='v/LaurentViennot')],
87
- venue='IPTPS', type='conference', year=2008, key='conf/iptps/BoufkhadMMPV08',
88
- url='https://dblp.org/rec/conf/iptps/BoufkhadMMPV08.html', pages=4)
87
+ venue='IPTPS', type='conference', year=2008, key='conf/iptps/BoufkhadMMPV08')
89
88
  >>> publications[-1] # doctest: +NORMALIZE_WHITESPACE
90
89
  DBLPPublication(title='Upper Bounds for Stabilization in Acyclic Preference-Based Systems.',
91
90
  authors=[DBLPAuthor(name='Fabien Mathieu', key='66/2077')], venue='SSS', type='conference', year=2007,
92
- key='conf/sss/Mathieu07', url='https://dblp.org/rec/conf/sss/Mathieu07.html', pages='372-382')
91
+ key='conf/sss/Mathieu07')
93
92
  """
94
93
  r = get(f"https://dblp.org/pid/{a.key}.xml")
95
94
  soup = Soup(r, features="xml")
96
95
  if wait:
97
- sleep(cls.author_backoff)
96
+ sleep(cls.publi_backoff)
98
97
  res = [DBLPPublication.from_soup(r) for r in soup("r")]
99
98
  return [p for p in res if p.authors]
100
99
 
@@ -110,7 +109,7 @@ class DBLPAuthor(Author, DBLP):
110
109
  return f"https://dblp.org/pid/{self.key}.html"
111
110
  return f"https://dblp.org/search?q={quote_plus(self.name)}"
112
111
 
113
- def get_publications(self, wait=False):
112
+ def get_publications(self, wait=True):
114
113
  return DBLP.from_author(self, wait=wait)
115
114
 
116
115
 
@@ -128,29 +127,27 @@ DBLP_TYPES = {
128
127
  @dataclass(repr=False)
129
128
  class DBLPPublication(Publication, DBLP):
130
129
  key: str
131
- url: str = None
132
- pages: str = None
133
- volume: int = None
134
- number: int = None
130
+ metadata: dict = field(default_factory=dict)
131
+
132
+ @property
133
+ def url(self):
134
+ if self.key:
135
+ return f"https://dblp.org/rec/{self.key}.html"
136
+ else:
137
+ return None
135
138
 
136
139
  @classmethod
137
140
  def from_soup(cls, soup):
138
141
  p = soup.find()
139
142
  typ = p.get("publtype", p.name)
140
143
  typ = DBLP_TYPES.get(typ, typ)
144
+
141
145
  res = {
142
146
  "type": typ,
143
147
  "key": p["key"],
144
- "url": f"https://dblp.org/rec/{p['key']}.html",
148
+ "title": p.title.text,
149
+ "year": int(p.year.text),
145
150
  }
146
- keys = ["title", "booktitle", "pages", "journal", "year", "volume", "number"]
147
- for tag in keys:
148
- t = p.find(tag)
149
- if t:
150
- try:
151
- res[tag] = int(t.text)
152
- except ValueError:
153
- res[tag] = t.text
154
151
  for tag in ["booktitle", "journal"]:
155
152
  t = p.find(tag)
156
153
  if t:
@@ -159,4 +156,11 @@ class DBLPPublication(Publication, DBLP):
159
156
  else:
160
157
  res["venue"] = "unpublished"
161
158
  res["authors"] = [DBLPAuthor(key=a["pid"], name=a.text) for a in p("author")]
162
- return cls(**{k: v for k, v in res.items() if k in cls.__match_args__})
159
+
160
+ metadata = dict()
161
+ for tag in p.find_all(recursive=False):
162
+ name = tag.name
163
+ if name not in {"title", "year", "author", "booktitle", "journal"}:
164
+ metadata[name] = auto_int(tag.text)
165
+
166
+ return cls(**res, metadata=metadata)
gismap/sources/hal.py CHANGED
@@ -2,6 +2,7 @@ from typing import ClassVar
2
2
  from dataclasses import dataclass, field
3
3
  from collections import defaultdict
4
4
  from urllib.parse import quote_plus
5
+ from bs4 import BeautifulSoup as Soup
5
6
  import json
6
7
 
7
8
  from gismap.sources.models import DB, Publication, Author # DBAuthor, DBPublication
@@ -67,7 +68,7 @@ class HAL(DB):
67
68
  elif "person_i" in a:
68
69
  pids[a["person_i"]].add(a.get("label_s"))
69
70
  elif "fullName_s" in a:
70
- names.add(a["fullName_s"])
71
+ names.add(a["fullName_s"])
71
72
  res = [
72
73
  HALAuthor(name=name, key=k, aliases=clean_aliases(name, v))
73
74
  for k, v in hids.items()
@@ -84,7 +85,7 @@ class HAL(DB):
84
85
  HALAuthor(
85
86
  name=name,
86
87
  key=fullname,
87
- aliases=clean_aliases(name, fullname),
88
+ aliases=[],
88
89
  key_type="fullname",
89
90
  )
90
91
  for fullname in names
@@ -117,7 +118,7 @@ class HAL(DB):
117
118
  HALAuthor(name='Diego Perino', key='Diego Perino', key_type='fullname'),
118
119
  HALAuthor(name='Laurent Viennot', key='laurentviennot')],
119
120
  venue='Proceedings of the 7th Internnational Workshop on Peer-to-Peer Systems (IPTPS)', type='conference',
120
- year=2008, key='471724', url='https://inria.hal.science/inria-00471724v1')
121
+ year=2008, key='471724')
121
122
  >>> diego = publications[2].authors[3]
122
123
  >>> diego
123
124
  HALAuthor(name='Diego Perino', key='Diego Perino', key_type='fullname')
@@ -127,7 +128,7 @@ class HAL(DB):
127
128
  HALPublication(title='Upper bounds for stabilization in acyclic preference-based systems',
128
129
  authors=[HALAuthor(name='Fabien Mathieu', key='fabien-mathieu')],
129
130
  venue="SSS'07 - 9th international conference on Stabilization, Safety, and Security of Distributed Systems",
130
- type='conference', year=2007, key='668356', url='https://inria.hal.science/hal-00668356v1')
131
+ type='conference', year=2007, key='668356')
131
132
 
132
133
  Case of someone with multiple ids one want to cumulate:
133
134
 
@@ -180,9 +181,30 @@ class HALAuthor(Author, HAL):
180
181
  key: str | int = None
181
182
  key_type: str = None
182
183
  aliases: list = field(default_factory=list)
184
+ _url: str = None
185
+ _img: str = None
186
+ _cv: bool = None
187
+
188
+ def check_cv(self):
189
+ if self.key_type is not None:
190
+ self._cv = False
191
+ return None
192
+ url = f"https://cv.hal.science/{self.key}"
193
+ soup = Soup(get(url), "lxml")
194
+ if soup.form:
195
+ self._cv = False
196
+ return None
197
+ self._cv = True
198
+ self._url = url
199
+ try:
200
+ self._img = soup.main.section.div.div.div.img["src"]
201
+ except TypeError:
202
+ return None
183
203
 
184
204
  @property
185
205
  def url(self):
206
+ if self._url is not None:
207
+ return self._url
186
208
  if self.key_type == "pid":
187
209
  return f"https://hal.science/search/index/?q=*&authIdPerson_i={self.key}"
188
210
  elif self.key_type == "fullname":
@@ -190,6 +212,12 @@ class HALAuthor(Author, HAL):
190
212
  else:
191
213
  return f"https://hal.science/search/index/?q=*&authIdHal_s={self.key}"
192
214
 
215
+ @property
216
+ def img(self):
217
+ if self._cv is None:
218
+ self.check_cv()
219
+ return self._img
220
+
193
221
  def get_publications(self):
194
222
  return HAL.from_author(self)
195
223
 
@@ -241,8 +269,11 @@ HAL_KEYS = {
241
269
  @dataclass(repr=False)
242
270
  class HALPublication(Publication, HAL):
243
271
  key: str
244
- abstract: str = None
245
- url: str = None
272
+ metadata: dict = field(default_factory=dict)
273
+
274
+ @property
275
+ def url(self):
276
+ return self.metadata.get("url")
246
277
 
247
278
  @classmethod
248
279
  def from_json(cls, r):
@@ -258,15 +289,20 @@ class HALPublication(Publication, HAL):
258
289
  :class:`~gismap.sources.hal.HALPublication`
259
290
 
260
291
  """
261
- res = {v: unlist(r[k]) for k, v in HAL_KEYS.items() if k in r}
292
+ keys = {v: unlist(r[k]) for k, v in HAL_KEYS.items() if k in r}
293
+ res = {k: keys[k] for k in ["key", "title", "year"]}
294
+ # res = {v: unlist(r[k]) for k, v in HAL_KEYS.items() if k in r}
262
295
  res["authors"] = [
263
296
  parse_facet_author(a) for a in r.get("authFullNamePersonIDIDHal_fs", [])
264
297
  ]
265
298
  for tag in ["booktitle", "journal", "conference"]:
266
- if tag in res:
267
- res["venue"] = res[tag]
299
+ if tag in keys:
300
+ res["venue"] = keys[tag]
268
301
  break
269
302
  else:
270
303
  res["venue"] = "unpublished"
271
- res["type"] = HAL_TYPES.get(res["type"], res["type"].lower())
272
- return cls(**{k: v for k, v in res.items() if k in cls.__match_args__})
304
+ res["type"] = HAL_TYPES.get(keys["type"], keys["type"].lower())
305
+ res["metadata"] = {
306
+ k: keys[k] for k in {"abstract", "url"} if k in keys and keys[k]
307
+ }
308
+ return cls(**res)
gismap/sources/multi.py CHANGED
@@ -6,11 +6,22 @@ from gismap.sources.models import Publication, Author
6
6
  from gismap.utils.text import clean_aliases
7
7
 
8
8
 
9
- score_rosetta = {
10
- "db_name": {"dblp": 1, "hal": 2},
11
- "venue": {"CoRR": -1, "unpublished": -2},
12
- "type": {"conference": 1, "journal": 2},
13
- }
9
+ def score_author_source(dbauthor):
10
+ if dbauthor.db_name == "hal":
11
+ if dbauthor.key_type == "fullname":
12
+ return -1
13
+ elif dbauthor.key_type == "pid":
14
+ return 2
15
+ else:
16
+ return 3
17
+ elif dbauthor.db_name == "dblp":
18
+ return 1
19
+ else:
20
+ return 0
21
+
22
+
23
+ def sort_author_sources(sources):
24
+ return sorted(sources, key=score_author_source, reverse=True)
14
25
 
15
26
 
16
27
  @dataclass(repr=False)
@@ -35,10 +46,15 @@ class SourcedAuthor(Author):
35
46
 
36
47
  @classmethod
37
48
  def from_sources(cls, sources):
49
+ sources = sort_author_sources(sources)
38
50
  return cls(name=sources[0].name, sources=sources)
39
51
 
40
- def get_publications(self, clean=True):
41
- res = {p.key: p for a in self.sources for p in a.get_publications()}
52
+ def get_publications(self, clean=True, selector=None):
53
+ if selector is None:
54
+ selector = []
55
+ res = {
56
+ p.key: p for a in self.sources for p in a.get_publications() if all(f(p) for f in selector)
57
+ }
42
58
  if clean:
43
59
  regroup_authors({self.key: self}, res)
44
60
  return regroup_publications(res)
@@ -46,30 +62,49 @@ class SourcedAuthor(Author):
46
62
  return res
47
63
 
48
64
 
65
+ publication_score_rosetta = {
66
+ "db_name": {"dblp": 1, "hal": 2},
67
+ "venue": {"CoRR": -1, "unpublished": -2},
68
+ "type": {"conference": 1, "journal": 2},
69
+ }
70
+
71
+
72
+ def score_publication_source(source):
73
+ scores = [
74
+ v.get(getattr(source, k, None), 0) for k, v in publication_score_rosetta.items()
75
+ ]
76
+ scores.append(source.year)
77
+ return tuple(scores)
78
+
79
+
80
+ def sort_publication_sources(sources):
81
+ return sorted(sources, key=score_publication_source, reverse=True)
82
+
83
+
49
84
  @dataclass(repr=False)
50
85
  class SourcedPublication(Publication):
51
- key: str
52
86
  sources: list = field(default_factory=list)
53
87
 
88
+ @property
89
+ def key(self):
90
+ if self.sources:
91
+ return self.sources[0].key
92
+ else:
93
+ return None
94
+
54
95
  @classmethod
55
96
  def from_sources(cls, sources):
56
- sources = sorted(sources, key=cls.score_source, reverse=True)
97
+ sources = sort_publication_sources(sources)
57
98
  main = sources[0]
58
99
  res = cls(
59
- **{k: getattr(main, k) for k in main.__dict__ if k in cls.__match_args__},
100
+ **{
101
+ k: getattr(main, k)
102
+ for k in ["title", "authors", "venue", "type", "year"]
103
+ },
60
104
  sources=sources,
61
105
  )
62
- for k, v in main.__dict__.items():
63
- if k not in cls.__match_args__:
64
- setattr(res, k, v)
65
106
  return res
66
107
 
67
- @staticmethod
68
- def score_source(source):
69
- scores = [v.get(getattr(source, k, None), 0) for k, v in score_rosetta.items()]
70
- scores.append(source.year)
71
- return tuple(scores)
72
-
73
108
 
74
109
  def regroup_authors(auth_dict, pub_dict):
75
110
  """
@@ -100,7 +135,7 @@ def regroup_authors(auth_dict, pub_dict):
100
135
  pub.authors = [redirection.get(a.key, a) for a in pub.authors]
101
136
 
102
137
 
103
- def regroup_publications(pub_dict, threshold=90, length_impact=0.2):
138
+ def regroup_publications(pub_dict, threshold=85, length_impact=0.05, n_range=5):
104
139
  """
105
140
  Puts together copies of the same publication.
106
141
 
@@ -119,17 +154,19 @@ def regroup_publications(pub_dict, threshold=90, length_impact=0.2):
119
154
  Unified publications.
120
155
  """
121
156
  pub_list = [p for p in pub_dict.values()]
157
+ res = dict()
122
158
 
123
- p = Process(length_impact=length_impact)
124
- p.fit([paper.title for paper in pub_list])
159
+ if pub_list:
125
160
 
126
- res = dict()
127
- done = np.zeros(len(pub_list), dtype=bool)
128
- for i, paper in enumerate(pub_list):
129
- if done[i]:
130
- continue
131
- locs = np.where(p.transform([paper.title])[0, :] > threshold)[0]
132
- pub = SourcedPublication.from_sources([pub_list[i] for i in locs])
133
- res[pub.key] = pub
134
- done[locs] = True
161
+ p = Process(length_impact=length_impact, n_range=n_range)
162
+ p.fit([paper.title for paper in pub_list])
163
+
164
+ done = np.zeros(len(pub_list), dtype=bool)
165
+ for i, paper in enumerate(pub_list):
166
+ if done[i]:
167
+ continue
168
+ locs = np.where(p.transform([paper.title])[0, :] > threshold)[0]
169
+ pub = SourcedPublication.from_sources([pub_list[i] for i in locs])
170
+ res[pub.key] = pub
171
+ done[locs] = True
135
172
  return res
gismap/utils/common.py CHANGED
@@ -1,4 +1,4 @@
1
- HIDDEN_KEYS = {"sources", "aliases", "abstract"}
1
+ HIDDEN_KEYS = {"sources", "aliases", "abstract", "metadata"}
2
2
 
3
3
 
4
4
  class LazyRepr:
@@ -10,7 +10,7 @@ class LazyRepr:
10
10
  kws = [
11
11
  f"{key}={value!r}"
12
12
  for key, value in self.__dict__.items()
13
- if value and key not in HIDDEN_KEYS
13
+ if value and key not in HIDDEN_KEYS and not key.startswith("_")
14
14
  ]
15
15
  return f"{type(self).__name__}({', '.join(kws)})"
16
16
 
@@ -58,3 +58,48 @@ def get_classes(root, key="name"):
58
58
  for c in root.__subclasses__():
59
59
  result.update(get_classes(c))
60
60
  return result
61
+
62
+
63
+ def list_of_objects(clss, dico, default=None):
64
+ """
65
+ Versatile way to enter a list of objects referenced by a dico.
66
+
67
+ Parameters
68
+ ----------
69
+ clss: :class:`object`
70
+ Object or reference to an object or list of objects / references to objects.
71
+ dico: :class:`dict`
72
+ Dictionary of references to objects.
73
+ default: :class:`list`, optional
74
+ Default list to return if `clss` is None.
75
+
76
+ Returns
77
+ -------
78
+ :class:`list`
79
+ Proper list of objects.
80
+
81
+ Examples
82
+ ________
83
+
84
+ >>> from gismap.sources.models import DB
85
+ >>> subclasses = get_classes(DB, key='db_name')
86
+ >>> from gismap import HAL, DBLP
87
+ >>> list_of_objects([HAL, 'dblp'], subclasses)
88
+ [<class 'gismap.sources.hal.HAL'>, <class 'gismap.sources.dblp.DBLP'>]
89
+ >>> list_of_objects(None, subclasses, [DBLP])
90
+ [<class 'gismap.sources.dblp.DBLP'>]
91
+ >>> list_of_objects(DBLP, subclasses)
92
+ [<class 'gismap.sources.dblp.DBLP'>]
93
+ >>> list_of_objects('hal', subclasses)
94
+ [<class 'gismap.sources.hal.HAL'>]
95
+ """
96
+ if default is None:
97
+ default = []
98
+ if clss is None:
99
+ return default
100
+ elif isinstance(clss, str):
101
+ return [dico[clss]]
102
+ elif isinstance(clss, list):
103
+ return [cls for lcls in clss for cls in list_of_objects(lcls, dico, default)]
104
+ else:
105
+ return [clss]