gismap 0.2.1__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gismap/__init__.py +1 -0
- gismap/gisgraphs/__init__.py +0 -0
- gismap/gisgraphs/builder.py +105 -0
- gismap/{lab → gisgraphs}/graph.py +79 -69
- gismap/gisgraphs/groups.py +70 -0
- gismap/gisgraphs/js.py +190 -0
- gismap/gisgraphs/options.py +37 -0
- gismap/gisgraphs/style.py +119 -0
- gismap/gisgraphs/widget.py +145 -0
- gismap/gismo.py +2 -2
- gismap/lab/__init__.py +3 -7
- gismap/lab/egomap.py +17 -18
- gismap/lab/expansion.py +10 -9
- gismap/lab/filters.py +84 -9
- gismap/lab/lab_author.py +51 -4
- gismap/lab/{lab.py → labmap.py} +33 -12
- gismap/lab_examples/__init__.py +0 -0
- gismap/lab_examples/cedric.py +46 -0
- gismap/lab_examples/lincs.py +44 -0
- gismap/{lab → lab_examples}/lip6.py +8 -4
- gismap/{lab → lab_examples}/toulouse.py +23 -6
- gismap/sources/dblp.py +15 -17
- gismap/sources/hal.py +17 -8
- gismap/sources/models.py +7 -0
- gismap/sources/multi.py +27 -17
- gismap/utils/requests.py +4 -2
- {gismap-0.2.1.dist-info → gismap-0.3.0.dist-info}/METADATA +21 -5
- gismap-0.3.0.dist-info/RECORD +38 -0
- gismap/lab/vis.py +0 -329
- gismap-0.2.1.dist-info/RECORD +0 -29
- {gismap-0.2.1.dist-info → gismap-0.3.0.dist-info}/WHEEL +0 -0
- {gismap-0.2.1.dist-info → gismap-0.3.0.dist-info}/licenses/AUTHORS.md +0 -0
gismap/lab/{lab.py → labmap.py}
RENAMED
|
@@ -17,14 +17,21 @@ from gismap.sources.multi import (
|
|
|
17
17
|
regroup_publications,
|
|
18
18
|
)
|
|
19
19
|
from gismap.lab.expansion import proper_prospects
|
|
20
|
-
from gismap.lab.filters import
|
|
21
|
-
|
|
20
|
+
from gismap.lab.filters import (
|
|
21
|
+
author_taboo_filter,
|
|
22
|
+
publication_taboo_filter,
|
|
23
|
+
publication_size_filter,
|
|
24
|
+
publication_oneword_filter,
|
|
25
|
+
)
|
|
26
|
+
from gismap.gisgraphs.builder import make_vis
|
|
22
27
|
|
|
23
28
|
|
|
24
|
-
class
|
|
29
|
+
class LabMap(MixInIO):
|
|
25
30
|
"""
|
|
26
31
|
Abstract class for labs.
|
|
27
32
|
|
|
33
|
+
Actual Lab classes can be created by implementing the `_author_iterator` method.
|
|
34
|
+
|
|
28
35
|
Labs can be saved with the `dump` method and loaded with the `load` method.
|
|
29
36
|
|
|
30
37
|
Parameters
|
|
@@ -33,23 +40,31 @@ class Lab(MixInIO):
|
|
|
33
40
|
Name of the lab. Can be set as class or instance attribute.
|
|
34
41
|
dbs: :class:`list`, default=[:class:`~gismap.sources.hal.HAL`, :class:`~gismap.sources.dblp.DBLP`]
|
|
35
42
|
List of DB sources to use.
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
Attributes
|
|
46
|
+
-----------
|
|
47
|
+
|
|
36
48
|
author_selectors: :class:`list`
|
|
37
|
-
Author
|
|
49
|
+
Author filters. Default: minimal filtering.
|
|
38
50
|
publication_selectors: :class:`list`
|
|
39
|
-
Publication filter. Default: less than 10 authors
|
|
51
|
+
Publication filter. Default: less than 10 authors, not an editorial, at least two words in the title.
|
|
40
52
|
"""
|
|
41
53
|
|
|
42
54
|
name = None
|
|
43
55
|
dbs = default_dbs
|
|
44
56
|
|
|
45
|
-
def __init__(
|
|
46
|
-
self, name=None, dbs=None):
|
|
57
|
+
def __init__(self, name=None, dbs=None):
|
|
47
58
|
if name is not None:
|
|
48
59
|
self.name = name
|
|
49
60
|
if dbs is not None:
|
|
50
61
|
self.dbs = list_of_objects(dbs, db_dict, default=default_dbs)
|
|
51
62
|
self.author_selectors = [author_taboo_filter()]
|
|
52
|
-
self.publication_selectors = [
|
|
63
|
+
self.publication_selectors = [
|
|
64
|
+
publication_size_filter(),
|
|
65
|
+
publication_taboo_filter(),
|
|
66
|
+
publication_oneword_filter(),
|
|
67
|
+
]
|
|
53
68
|
self.authors = None
|
|
54
69
|
self.publications = None
|
|
55
70
|
|
|
@@ -82,6 +97,8 @@ class Lab(MixInIO):
|
|
|
82
97
|
self.authors[author.key] = author
|
|
83
98
|
if author.metadata.img is None:
|
|
84
99
|
author.auto_img()
|
|
100
|
+
if author.metadata.group is None:
|
|
101
|
+
author.metadata.group = self.name
|
|
85
102
|
|
|
86
103
|
def update_publis(self, desc="Publications information"):
|
|
87
104
|
"""
|
|
@@ -94,7 +111,9 @@ class Lab(MixInIO):
|
|
|
94
111
|
pubs = dict()
|
|
95
112
|
for author in tqdm(self.authors.values(), desc=desc):
|
|
96
113
|
pubs.update(
|
|
97
|
-
author.get_publications(
|
|
114
|
+
author.get_publications(
|
|
115
|
+
clean=False, selector=self.publication_selectors
|
|
116
|
+
)
|
|
98
117
|
)
|
|
99
118
|
regroup_authors(self.authors, pubs)
|
|
100
119
|
self.publications = regroup_publications(pubs)
|
|
@@ -118,7 +137,9 @@ class Lab(MixInIO):
|
|
|
118
137
|
author.auto_img()
|
|
119
138
|
author.metadata.group = group
|
|
120
139
|
pubs.update(
|
|
121
|
-
author.get_publications(
|
|
140
|
+
author.get_publications(
|
|
141
|
+
clean=False, selector=self.publication_selectors
|
|
142
|
+
)
|
|
122
143
|
)
|
|
123
144
|
|
|
124
145
|
for pub in self.publications.values():
|
|
@@ -132,7 +153,7 @@ class Lab(MixInIO):
|
|
|
132
153
|
return None
|
|
133
154
|
|
|
134
155
|
def html(self, **kwargs):
|
|
135
|
-
return
|
|
156
|
+
return make_vis(self, **kwargs)
|
|
136
157
|
|
|
137
158
|
def save_html(self, name=None, **kwargs):
|
|
138
159
|
if name is None:
|
|
@@ -145,7 +166,7 @@ class Lab(MixInIO):
|
|
|
145
166
|
display(HTML(self.html(**kwargs)))
|
|
146
167
|
|
|
147
168
|
|
|
148
|
-
class
|
|
169
|
+
class ListMap(LabMap):
|
|
149
170
|
"""
|
|
150
171
|
Simplest way to create a lab: with a list of names.
|
|
151
172
|
|
|
File without changes
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from bs4 import BeautifulSoup as Soup
|
|
3
|
+
from gismap.lab.labmap import LabMap
|
|
4
|
+
from gismap.lab.lab_author import AuthorMetadata, LabAuthor
|
|
5
|
+
from gismap.utils.requests import get
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CedricMap(LabMap):
|
|
9
|
+
"""
|
|
10
|
+
Class for handling a CNAM Cedric team from its name.
|
|
11
|
+
Default to `roc` team.
|
|
12
|
+
"""
|
|
13
|
+
name = "roc"
|
|
14
|
+
base_url = "https://cedric.cnam.fr"
|
|
15
|
+
|
|
16
|
+
def _author_iterator(self):
|
|
17
|
+
soup = Soup(get(f"{self.base_url}/equipes/{self.name}/"), features="lxml")
|
|
18
|
+
searchers = [li.a for ul in soup.find('div', {'id': 'annuaire'})('ul')[:3] for li in ul('li')]
|
|
19
|
+
done = set()
|
|
20
|
+
for searcher in searchers:
|
|
21
|
+
name = searcher.text.split('(')[0].strip()
|
|
22
|
+
if name in done:
|
|
23
|
+
continue
|
|
24
|
+
url = f"{self.base_url}{searcher['href']}"
|
|
25
|
+
sousoup = Soup(get(url), features="lxml")
|
|
26
|
+
img = sousoup.find('img', {'class': 'photo'})['src']
|
|
27
|
+
response = requests.head(img, allow_redirects=True)
|
|
28
|
+
if int(response.headers.get("Content-Length")) < 3000:
|
|
29
|
+
img = None
|
|
30
|
+
done.add(name)
|
|
31
|
+
yield LabAuthor(name=name, metadata=AuthorMetadata(url=url, img=img, group=self.name.upper()))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class CedricFull(LabMap):
|
|
35
|
+
"""
|
|
36
|
+
Class for handling all CNAM Cedric teams using `https://cedric.cnam.fr/equipes` to get team names.
|
|
37
|
+
"""
|
|
38
|
+
name = "Cedric"
|
|
39
|
+
|
|
40
|
+
def _author_iterator(self):
|
|
41
|
+
base = "https://cedric.cnam.fr/equipes/"
|
|
42
|
+
soup = Soup(get(base), features="lxml")
|
|
43
|
+
teams = {a['href'].split('/')[-2] for a in soup('a') if base in a.get('href', "") and len(a['href'])>len(base)}
|
|
44
|
+
for team in teams:
|
|
45
|
+
for author in CedricMap(name=team)._author_iterator():
|
|
46
|
+
yield author
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from bs4 import BeautifulSoup as Soup
|
|
2
|
+
from gismap.utils.requests import get
|
|
3
|
+
from gismap.lab.filters import re_filter
|
|
4
|
+
from gismap.lab import LabAuthor, Map
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
ghosts = [
|
|
8
|
+
"Altman",
|
|
9
|
+
"Lelarge",
|
|
10
|
+
"Teixera",
|
|
11
|
+
"Friedman",
|
|
12
|
+
"Fdida",
|
|
13
|
+
"Blaszczyszyn",
|
|
14
|
+
"Jacquet",
|
|
15
|
+
"Panafieu",
|
|
16
|
+
"Bušić",
|
|
17
|
+
]
|
|
18
|
+
no_ghost = re_filter(ghosts)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LINCS(Map):
|
|
22
|
+
name = "LINCS"
|
|
23
|
+
|
|
24
|
+
def _author_iterator(self):
|
|
25
|
+
soup = Soup(get("https://www.lincs.fr/people/"), features="lxml")
|
|
26
|
+
for entry in soup.main("div", class_="trombinoscope-row"):
|
|
27
|
+
cols = entry("div")
|
|
28
|
+
name = cols[1].text
|
|
29
|
+
if not no_ghost(name):
|
|
30
|
+
continue
|
|
31
|
+
img = cols[0].img["src"]
|
|
32
|
+
url = cols[-1].a
|
|
33
|
+
if url:
|
|
34
|
+
url = url.get("href")
|
|
35
|
+
group = cols[2]("a")
|
|
36
|
+
if group:
|
|
37
|
+
group = group[-1].text
|
|
38
|
+
else:
|
|
39
|
+
group = "External"
|
|
40
|
+
author = LabAuthor(name)
|
|
41
|
+
author.metadata.img = img
|
|
42
|
+
author.metadata.group = group
|
|
43
|
+
author.metadata.url = url
|
|
44
|
+
yield author
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from bs4 import BeautifulSoup as Soup
|
|
2
2
|
import re
|
|
3
3
|
|
|
4
|
-
from gismap.lab.
|
|
4
|
+
from gismap.lab.labmap import LabMap
|
|
5
5
|
from gismap.lab.lab_author import AuthorMetadata, LabAuthor
|
|
6
6
|
from gismap.utils.requests import get
|
|
7
7
|
|
|
8
8
|
|
|
9
|
-
class
|
|
9
|
+
class Lip6Map(LabMap):
|
|
10
10
|
"""
|
|
11
11
|
Class for handling a LIP6 team using `https://www.lip6.fr/recherche/team_membres.php?acronyme=*team_acronym*` as entry point.
|
|
12
12
|
Default to `NPA` team.
|
|
@@ -25,10 +25,14 @@ class Lip6Lab(Lab):
|
|
|
25
25
|
previous = a.find_previous_sibling()
|
|
26
26
|
if previous is not None and "user" in previous.get("class", []):
|
|
27
27
|
metadata.url = previous["href"].strip()
|
|
28
|
+
fiche = "https://www.lip6.fr/" + a["href"].split("/", 1)[1]
|
|
29
|
+
img = Soup(get(fiche), "lxml").img
|
|
30
|
+
if img and "reflet" in img["class"] and "noPhoto" not in img["src"]:
|
|
31
|
+
metadata.img = "https://www.lip6.fr/" + img["src"].split("/", 1)[1]
|
|
28
32
|
yield LabAuthor(name=name, metadata=metadata)
|
|
29
33
|
|
|
30
34
|
|
|
31
|
-
class
|
|
35
|
+
class Lip6Full(Lip6Map):
|
|
32
36
|
"""
|
|
33
37
|
Class for handling all LIP6 teams using `https://www.lip6.fr/informations/annuaire.php` to get team names.
|
|
34
38
|
"""
|
|
@@ -40,5 +44,5 @@ class Lip6(Lip6Lab):
|
|
|
40
44
|
for group in groups.findall(
|
|
41
45
|
get("https://www.lip6.fr/informations/annuaire.php")
|
|
42
46
|
):
|
|
43
|
-
for author in
|
|
47
|
+
for author in Lip6Map(name=group)._author_iterator():
|
|
44
48
|
yield author
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from bs4 import BeautifulSoup as Soup
|
|
3
|
-
from gismap.lab.
|
|
3
|
+
from gismap.lab.labmap import LabMap
|
|
4
4
|
from gismap.lab.lab_author import AuthorMetadata, LabAuthor
|
|
5
5
|
from gismap.utils.requests import get
|
|
6
6
|
|
|
@@ -9,7 +9,7 @@ def name_changer(name, rosetta):
|
|
|
9
9
|
return rosetta.get(name, name)
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
class
|
|
12
|
+
class LaasMap(LabMap):
|
|
13
13
|
"""
|
|
14
14
|
Class for handling a LAAS team from its name.
|
|
15
15
|
Default to `sara` team.
|
|
@@ -29,10 +29,24 @@ class LaasLab(Lab):
|
|
|
29
29
|
if "public_avatar" in a.img["class"]
|
|
30
30
|
else None
|
|
31
31
|
)
|
|
32
|
-
yield LabAuthor(name=name, metadata=AuthorMetadata(url=url, img=img))
|
|
32
|
+
yield LabAuthor(name=name, metadata=AuthorMetadata(url=url, img=img, group=self.name.upper()))
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
class
|
|
35
|
+
class LaasFull(LabMap):
|
|
36
|
+
"""
|
|
37
|
+
Class for handling all LAAS teams using `https://www.laas.fr/fr/equipes/` to get team names.
|
|
38
|
+
"""
|
|
39
|
+
name = "LAAS"
|
|
40
|
+
|
|
41
|
+
def _author_iterator(self):
|
|
42
|
+
soup = Soup(get("https://www.laas.fr/fr/equipes/"), features="lxml")
|
|
43
|
+
teams = [a['href'].split('/')[-2] for a in soup('a', {'class': "badge"})]
|
|
44
|
+
for team in teams:
|
|
45
|
+
for author in LaasMap(name=team)._author_iterator():
|
|
46
|
+
yield author
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class SolaceMap(LabMap):
|
|
36
50
|
"""
|
|
37
51
|
Class for handling the Solace team (`https://solace.cnrs.fr`).
|
|
38
52
|
"""
|
|
@@ -41,8 +55,11 @@ class Solace(Lab):
|
|
|
41
55
|
regex = re.compile(r"<li>(.*?)(,| \(|</li>)")
|
|
42
56
|
|
|
43
57
|
def _author_iterator(self):
|
|
44
|
-
html = get("https://solace.cnrs.fr/people.html")
|
|
58
|
+
html = get("https://solace.cnrs.fr/people.html", verify=False)
|
|
45
59
|
for name, _ in self.regex.findall(html):
|
|
46
60
|
soup = Soup(name, features="lxml")
|
|
47
61
|
url = soup.a["href"] if soup.a else None
|
|
48
|
-
yield LabAuthor(
|
|
62
|
+
yield LabAuthor(
|
|
63
|
+
name=soup.text.strip(),
|
|
64
|
+
metadata=AuthorMetadata(url=url, group=self.name.upper()),
|
|
65
|
+
)
|
gismap/sources/dblp.py
CHANGED
|
@@ -72,23 +72,6 @@ class DBLP(DB):
|
|
|
72
72
|
Papers available in DBLP.
|
|
73
73
|
wait: :class:`bool`
|
|
74
74
|
Wait a bit to avoid 429.
|
|
75
|
-
|
|
76
|
-
Examples
|
|
77
|
-
--------
|
|
78
|
-
|
|
79
|
-
>>> fabien = DBLPAuthor('Fabien Mathieu', key='66/2077')
|
|
80
|
-
>>> publications = sorted(DBLP.from_author(fabien),
|
|
81
|
-
... key=lambda p: p.title)
|
|
82
|
-
>>> publications[0] # doctest: +NORMALIZE_WHITESPACE
|
|
83
|
-
DBLPPublication(title='Achievable catalog size in peer-to-peer video-on-demand systems.',
|
|
84
|
-
authors=[DBLPAuthor(name='Yacine Boufkhad', key='75/5742'), DBLPAuthor(name='Fabien Mathieu', key='66/2077'),
|
|
85
|
-
DBLPAuthor(name='Fabien de Montgolfier', key='57/6313'), DBLPAuthor(name='Diego Perino', key='03/3645'),
|
|
86
|
-
DBLPAuthor(name='Laurent Viennot', key='v/LaurentViennot')],
|
|
87
|
-
venue='IPTPS', type='conference', year=2008, key='conf/iptps/BoufkhadMMPV08')
|
|
88
|
-
>>> publications[-1] # doctest: +NORMALIZE_WHITESPACE
|
|
89
|
-
DBLPPublication(title='Upper Bounds for Stabilization in Acyclic Preference-Based Systems.',
|
|
90
|
-
authors=[DBLPAuthor(name='Fabien Mathieu', key='66/2077')], venue='SSS', type='conference', year=2007,
|
|
91
|
-
key='conf/sss/Mathieu07')
|
|
92
75
|
"""
|
|
93
76
|
r = get(f"https://dblp.org/pid/{a.key}.xml")
|
|
94
77
|
soup = Soup(r, features="xml")
|
|
@@ -100,6 +83,21 @@ class DBLP(DB):
|
|
|
100
83
|
|
|
101
84
|
@dataclass(repr=False)
|
|
102
85
|
class DBLPAuthor(Author, DBLP):
|
|
86
|
+
"""
|
|
87
|
+
Examples
|
|
88
|
+
--------
|
|
89
|
+
|
|
90
|
+
>>> fabien = DBLPAuthor('Fabien Mathieu', key='66/2077')
|
|
91
|
+
>>> publications = sorted(fabien.get_publications(),
|
|
92
|
+
... key=lambda p: p.title)
|
|
93
|
+
>>> publications[0].url # doctest: +NORMALIZE_WHITESPACE
|
|
94
|
+
'https://dblp.org/rec/conf/iptps/BoufkhadMMPV08.html'
|
|
95
|
+
>>> publications[-1] # doctest: +NORMALIZE_WHITESPACE
|
|
96
|
+
DBLPPublication(title='Upper Bounds for Stabilization in Acyclic Preference-Based Systems.',
|
|
97
|
+
authors=[DBLPAuthor(name='Fabien Mathieu', key='66/2077')], venue='SSS', type='conference', year=2007,
|
|
98
|
+
key='conf/sss/Mathieu07')
|
|
99
|
+
|
|
100
|
+
"""
|
|
103
101
|
key: str
|
|
104
102
|
aliases: list = field(default_factory=list)
|
|
105
103
|
|
gismap/sources/hal.py
CHANGED
|
@@ -44,8 +44,7 @@ class HAL(DB):
|
|
|
44
44
|
>>> HAL.search_author("Ana Busic")
|
|
45
45
|
[HALAuthor(name='Ana Busic', key='anabusic')]
|
|
46
46
|
>>> HAL.search_author("Potop-Butucaru Maria") # doctest: +NORMALIZE_WHITESPACE
|
|
47
|
-
[HALAuthor(name='Potop-Butucaru Maria', key='
|
|
48
|
-
HALAuthor(name='Potop-Butucaru Maria', key='841868', key_type='pid')]
|
|
47
|
+
[HALAuthor(name='Potop-Butucaru Maria', key='841868', key_type='pid')]
|
|
49
48
|
>>> diego = HAL.search_author("Diego Perino")
|
|
50
49
|
>>> diego # doctest: +NORMALIZE_WHITESPACE
|
|
51
50
|
[HALAuthor(name='Diego Perino', key='847558', key_type='pid'),
|
|
@@ -134,12 +133,12 @@ class HAL(DB):
|
|
|
134
133
|
|
|
135
134
|
>>> maria = HAL.search_author('Maria Potop-Butucaru')
|
|
136
135
|
>>> maria # doctest: +NORMALIZE_WHITESPACE
|
|
137
|
-
[HALAuthor(name='Maria Potop-Butucaru', key='
|
|
138
|
-
|
|
139
|
-
>>>
|
|
140
|
-
|
|
141
|
-
>>> len(maria[
|
|
142
|
-
|
|
136
|
+
[HALAuthor(name='Maria Potop-Butucaru', key='841868', key_type='pid')]
|
|
137
|
+
>>> n_pubs = len(HAL.from_author(maria[0]))
|
|
138
|
+
>>> n_pubs > 200
|
|
139
|
+
True
|
|
140
|
+
>>> n_pubs == len(maria[0].get_publications())
|
|
141
|
+
True
|
|
143
142
|
|
|
144
143
|
Note: an error is raised if not enough data is provided
|
|
145
144
|
|
|
@@ -173,6 +172,9 @@ class HAL(DB):
|
|
|
173
172
|
r = get(api, params=params)
|
|
174
173
|
response = json.loads(r)["response"]
|
|
175
174
|
res = [HALPublication.from_json(r) for r in response.get("docs", [])]
|
|
175
|
+
if len(res) == 0 and a.key_type != "fullname":
|
|
176
|
+
name = a.name
|
|
177
|
+
return HAL.from_author(HALAuthor(name=name, key=name, key_type="fullname"))
|
|
176
178
|
return res
|
|
177
179
|
|
|
178
180
|
|
|
@@ -185,6 +187,13 @@ class HALAuthor(Author, HAL):
|
|
|
185
187
|
_img: str = None
|
|
186
188
|
_cv: bool = None
|
|
187
189
|
|
|
190
|
+
def __post_init__(self):
|
|
191
|
+
if self.key and self.key_type is None:
|
|
192
|
+
if self.key.isdigit():
|
|
193
|
+
self.key_type = "pid"
|
|
194
|
+
if " " in self.key:
|
|
195
|
+
self.key_type = "fullname"
|
|
196
|
+
|
|
188
197
|
def check_cv(self):
|
|
189
198
|
if self.key_type is not None:
|
|
190
199
|
self._cv = False
|
gismap/sources/models.py
CHANGED
gismap/sources/multi.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from dataclasses import dataclass, field
|
|
2
|
-
from bof.fuzz import
|
|
2
|
+
from bof.fuzz import jit_square_factors
|
|
3
|
+
from bof.feature_extraction import CountVectorizer
|
|
3
4
|
import numpy as np
|
|
4
5
|
|
|
5
6
|
from gismap.sources.models import Publication, Author
|
|
@@ -52,8 +53,13 @@ class SourcedAuthor(Author):
|
|
|
52
53
|
def get_publications(self, clean=True, selector=None):
|
|
53
54
|
if selector is None:
|
|
54
55
|
selector = []
|
|
56
|
+
if not isinstance(selector, list):
|
|
57
|
+
selector = [selector]
|
|
55
58
|
res = {
|
|
56
|
-
p.key: p
|
|
59
|
+
p.key: p
|
|
60
|
+
for a in self.sources
|
|
61
|
+
for p in a.get_publications()
|
|
62
|
+
if all(f(p) for f in selector)
|
|
57
63
|
}
|
|
58
64
|
if clean:
|
|
59
65
|
regroup_authors({self.key: self}, res)
|
|
@@ -132,7 +138,9 @@ def regroup_authors(auth_dict, pub_dict):
|
|
|
132
138
|
}
|
|
133
139
|
|
|
134
140
|
for pub in pub_dict.values():
|
|
135
|
-
pub.authors = [
|
|
141
|
+
pub.authors = [
|
|
142
|
+
redirection.get(a.key, redirection.get(a.name, a)) for a in pub.authors
|
|
143
|
+
]
|
|
136
144
|
|
|
137
145
|
|
|
138
146
|
def regroup_publications(pub_dict, threshold=85, length_impact=0.05, n_range=5):
|
|
@@ -153,20 +161,22 @@ def regroup_publications(pub_dict, threshold=85, length_impact=0.05, n_range=5):
|
|
|
153
161
|
:class:`dict`
|
|
154
162
|
Unified publications.
|
|
155
163
|
"""
|
|
164
|
+
if len(pub_dict) == 0:
|
|
165
|
+
return dict()
|
|
156
166
|
pub_list = [p for p in pub_dict.values()]
|
|
157
167
|
res = dict()
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
168
|
+
vectorizer = CountVectorizer(n_range=n_range)
|
|
169
|
+
x = vectorizer.fit_transform([p.title for p in pub_list])
|
|
170
|
+
y = x.T.tocsr()
|
|
171
|
+
jc_matrix = jit_square_factors(
|
|
172
|
+
x.indices, x.indptr, y.indices, y.indptr, len(pub_list), length_impact
|
|
173
|
+
)
|
|
174
|
+
done = np.zeros(len(pub_list), dtype=bool)
|
|
175
|
+
for i, paper in enumerate(pub_list):
|
|
176
|
+
if done[i]:
|
|
177
|
+
continue
|
|
178
|
+
locs = np.where(jc_matrix[i, :] > threshold)[0]
|
|
179
|
+
pub = SourcedPublication.from_sources([pub_list[i] for i in locs])
|
|
180
|
+
res[pub.key] = pub
|
|
181
|
+
done[locs] = True
|
|
172
182
|
return res
|
gismap/utils/requests.py
CHANGED
|
@@ -13,7 +13,7 @@ session.headers.update(
|
|
|
13
13
|
)
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
def get(url, params=None, n_trials=10):
|
|
16
|
+
def get(url, params=None, n_trials=10, verify=True):
|
|
17
17
|
"""
|
|
18
18
|
Parameters
|
|
19
19
|
----------
|
|
@@ -21,6 +21,8 @@ def get(url, params=None, n_trials=10):
|
|
|
21
21
|
Entry point to fetch.
|
|
22
22
|
params: :class:`dict`, optional
|
|
23
23
|
Get arguments (appended to URL).
|
|
24
|
+
verify: :class:`bool`, optional
|
|
25
|
+
Verify certificates.
|
|
24
26
|
|
|
25
27
|
Returns
|
|
26
28
|
-------
|
|
@@ -29,7 +31,7 @@ def get(url, params=None, n_trials=10):
|
|
|
29
31
|
"""
|
|
30
32
|
for attempt in range(n_trials):
|
|
31
33
|
try:
|
|
32
|
-
r = session.get(url, params=params)
|
|
34
|
+
r = session.get(url, params=params, verify=verify)
|
|
33
35
|
if r.status_code == 429:
|
|
34
36
|
try:
|
|
35
37
|
t = int(r.headers["Retry-After"])
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gismap
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: GisMap leverages DBLP and HAL databases to provide cartography tools for you and your lab.
|
|
5
5
|
Project-URL: Repository, https://github.com/balouf/gismap
|
|
6
6
|
Project-URL: Documentation, https://balouf.github.io/gismap
|
|
7
7
|
Author-email: Fabien Mathieu <fabien.mathieu@normalesup.org>
|
|
@@ -10,8 +10,11 @@ License-Expression: MIT
|
|
|
10
10
|
License-File: AUTHORS.md
|
|
11
11
|
Requires-Python: >=3.10
|
|
12
12
|
Requires-Dist: bof>=0.3.5
|
|
13
|
+
Requires-Dist: distinctipy>=1.3.4
|
|
14
|
+
Requires-Dist: domonic>=0.9.13
|
|
13
15
|
Requires-Dist: gismo>=0.5.2
|
|
14
16
|
Requires-Dist: ipykernel>=6.30.1
|
|
17
|
+
Requires-Dist: ipywidgets>=8.1.8
|
|
15
18
|
Requires-Dist: tqdm>=4.67.1
|
|
16
19
|
Description-Content-Type: text/markdown
|
|
17
20
|
|
|
@@ -19,12 +22,13 @@ Description-Content-Type: text/markdown
|
|
|
19
22
|
|
|
20
23
|
|
|
21
24
|
[](https://pypi.python.org/pypi/gismap)
|
|
25
|
+
[](https://mybinder.org/v2/gh/balouf/gismap/HEAD?urlpath=%2Fdoc%2Ftree%2Fbinder%2Finteractive.ipynb)
|
|
22
26
|
[](https://github.com/balouf/gismap/actions?query=workflow%3Abuild)
|
|
23
27
|
[](https://github.com/balouf/gismap/actions?query=workflow%3Adocs)
|
|
24
28
|
[](https://opensource.org/licenses/MIT)
|
|
25
29
|
[](https://codecov.io/gh/balouf/gismap/tree/main)
|
|
26
30
|
|
|
27
|
-
|
|
31
|
+
GisMap leverages DBLP and HAL databases to provide cartography tools for you and your lab.
|
|
28
32
|
|
|
29
33
|
- Free software: MIT
|
|
30
34
|
- Documentation: <https://balouf.github.io/gismap/>.
|
|
@@ -37,15 +41,27 @@ GISMAP leverages DBLP and HAL databases to provide cartography tools for you and
|
|
|
37
41
|
- Automatically keeps track of a Lab/Department members and publications.
|
|
38
42
|
- Builds interactive collaboration graphs.
|
|
39
43
|
|
|
44
|
+
## Test GisMap online!
|
|
45
|
+
|
|
46
|
+
Don't want to install GisMap on your computer (yet)? No worries, you can play with it using https://mybinder.org/.
|
|
47
|
+
|
|
48
|
+
For example:
|
|
49
|
+
|
|
50
|
+
- [A simple interface to display and save collaboration graphs](https://mybinder.org/v2/gh/balouf/gismap/HEAD?urlpath=%2Fdoc%2Ftree%2Fbinder%2Finteractive.ipynb)
|
|
51
|
+
- [Tutorial: Making LabMaps](https://mybinder.org/v2/gh/balouf/gismap/HEAD?urlpath=%2Fdoc%2Ftree%2Fdocs%2Ftutorials%2Flab_tutorial.ipynb)
|
|
52
|
+
- [Tutorial: Making EgoMaps](https://mybinder.org/v2/gh/balouf/gismap/HEAD?urlpath=%2Fdoc%2Ftree%2Fdocs%2Ftutorials%2Fegomap.ipynb)
|
|
53
|
+
- [Jupyter Lab instance with GisMap installed](https://mybinder.org/v2/gh/balouf/gismap/HEAD)
|
|
54
|
+
|
|
55
|
+
|
|
40
56
|
## Quickstart
|
|
41
57
|
|
|
42
|
-
Install
|
|
58
|
+
Install GisMap:
|
|
43
59
|
|
|
44
60
|
```console
|
|
45
61
|
$ pip install gismap
|
|
46
62
|
```
|
|
47
63
|
|
|
48
|
-
Use
|
|
64
|
+
Use GisMap to produce a collaboration graph (HTML):
|
|
49
65
|
|
|
50
66
|
```pycon
|
|
51
67
|
>>> from gismap.sources.hal import HAL
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
gismap/__init__.py,sha256=Zk5ZXwC-MBREnngJh3V9WUtxLSAUKhkyJhV6kUjtxLE,800
|
|
2
|
+
gismap/author.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
gismap/gismap.py,sha256=h0hwdogXGFqerm-5ZPeT-irPn91pCcQRjiHThXsRzEk,19
|
|
4
|
+
gismap/gismo.py,sha256=oDAryl4XQzHE0tUmOWC-3G1n_zUgTeykPL-JWSDYwe0,6307
|
|
5
|
+
gismap/search.py,sha256=nsUoDsFGeEtvCZ0dB7ooRPC_6qsazkiWx_oM7dHdNV4,4932
|
|
6
|
+
gismap/gisgraphs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
gismap/gisgraphs/builder.py,sha256=La01_OczsSfZ1hu8sq6rdQN_TelBatN8wP3xLF7TwIg,3234
|
|
8
|
+
gismap/gisgraphs/graph.py,sha256=RuUuWdPudrbrDyoGMj2V9SuJGkrsClL_4_4tUMptsjk,7028
|
|
9
|
+
gismap/gisgraphs/groups.py,sha256=1E-7Xrv0uDw2SgqwtdjgeRLVBLaC7agUrrVics4jVLs,2405
|
|
10
|
+
gismap/gisgraphs/js.py,sha256=Gbz5nMWORabZkgIdyZAe1sMlnwJZ9jy7sLrx0vYStzI,6283
|
|
11
|
+
gismap/gisgraphs/options.py,sha256=lmUSnfSwrZQyJpGGs16JUGDIQNcJeX4Y0tA8cyC0nuM,817
|
|
12
|
+
gismap/gisgraphs/style.py,sha256=sXNUnv690kxiJiRQZ7lv4iKKrsxMqAfblheJbqesd48,4653
|
|
13
|
+
gismap/gisgraphs/widget.py,sha256=s7W_8N4f2palM0ChVFYyr9ImcQoMBiEw83MIO4HSm6c,4542
|
|
14
|
+
gismap/lab/__init__.py,sha256=ifyZqI9BpC5NRlMfSmJ671tnKWJDoXbo18iDoE-VR1s,181
|
|
15
|
+
gismap/lab/egomap.py,sha256=RabRJSWJ0xrG67l012En0rbi7ukr4R2lR0hc_K7Xp0o,1211
|
|
16
|
+
gismap/lab/expansion.py,sha256=CMUsXqo-shRyb_MiuPRL5-ZgaitxAxjfbSY_fvzi_1E,6236
|
|
17
|
+
gismap/lab/filters.py,sha256=pG_g2POQXMbyUUw0aXOaeyiGBbiSc7M2NzxLCTQrALk,1875
|
|
18
|
+
gismap/lab/lab_author.py,sha256=XwSXvioHDreZWcaWioGW4rjU2zZN10o89ilyfOsWV90,4497
|
|
19
|
+
gismap/lab/labmap.py,sha256=w3dFCyDHM-hEwHEQzrQA8GEOs8juu8F-f18gbufiOe8,5782
|
|
20
|
+
gismap/lab_examples/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
gismap/lab_examples/cedric.py,sha256=AjgYy5dhzqh3vDsr9ia_hbtSc9_2Ic238rmJO198FMM,1764
|
|
22
|
+
gismap/lab_examples/lincs.py,sha256=-mIVMGQMrtCtJ3N-oCU8j4Ko9mDuhEPB_pA0gaIw4QA,1126
|
|
23
|
+
gismap/lab_examples/lip6.py,sha256=K32Jqe3-o99QYI--akmwBDFAWKgq0HFEk_psC4akR60,1740
|
|
24
|
+
gismap/lab_examples/toulouse.py,sha256=OUKrK0uefn4uvW74qMsF792un203z3OUfKTquLPGBH4,2091
|
|
25
|
+
gismap/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
26
|
+
gismap/sources/dblp.py,sha256=eVd1u09BH-0TgAD3dXn78zsW5Er69mE_vKxPeGDaBw0,4834
|
|
27
|
+
gismap/sources/hal.py,sha256=tZkeDWob4p5fccBRXC10G3kf2rnVAVzwdkf3swjvl0c,10306
|
|
28
|
+
gismap/sources/models.py,sha256=XlNrQWTF-DQbfIFaSLPsgWPN-c79_0rfr_2jDasgukM,713
|
|
29
|
+
gismap/sources/multi.py,sha256=7aiYuExiCoU_5GCwi_ufhesy44HsAh9lNFx_J444YJs,4690
|
|
30
|
+
gismap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
+
gismap/utils/common.py,sha256=nx1f60yNwFpl1oz08h-R5o0xK9CbJv9tmYLDk61dwYA,2898
|
|
32
|
+
gismap/utils/logger.py,sha256=1YALIaNYKTqeIyyCnYxzvZTK7x4FTSfYYl5CP9IMw8E,86
|
|
33
|
+
gismap/utils/requests.py,sha256=nPnTh-lfo8cXiCeEhzZJ2AMo0odDtx4slPN1rTE4H_E,1384
|
|
34
|
+
gismap/utils/text.py,sha256=1_9DlduAYh7Nz-yAg-MaCTmdKbPPmuIY20bb87t7JAQ,3810
|
|
35
|
+
gismap-0.3.0.dist-info/METADATA,sha256=BWpxgm1dncPB8ISiMH95WY_Dyn_AyerfVrHIR6X1VYA,3469
|
|
36
|
+
gismap-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
37
|
+
gismap-0.3.0.dist-info/licenses/AUTHORS.md,sha256=oDR4mptVUBMq0WKIpt19Km1Bdfz3cO2NAOVgwVfTO8g,131
|
|
38
|
+
gismap-0.3.0.dist-info/RECORD,,
|