pub-analyzer 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pub-analyzer might be problematic. Click here for more details.
- pub_analyzer/css/editor.tcss +60 -0
- pub_analyzer/css/report.tcss +15 -6
- pub_analyzer/internal/limiter.py +34 -0
- pub_analyzer/internal/render.py +17 -44
- pub_analyzer/internal/report.py +60 -29
- pub_analyzer/internal/templates/author_report.typ +556 -0
- pub_analyzer/main.py +4 -7
- pub_analyzer/models/institution.py +3 -3
- pub_analyzer/widgets/author/tables.py +2 -1
- pub_analyzer/widgets/body.py +19 -0
- pub_analyzer/widgets/common/__init__.py +2 -0
- pub_analyzer/widgets/common/label.py +36 -0
- pub_analyzer/widgets/report/core.py +10 -0
- pub_analyzer/widgets/report/editor.py +80 -0
- pub_analyzer/widgets/report/export.py +2 -4
- pub_analyzer/widgets/report/work.py +145 -6
- pub_analyzer/widgets/search/__init__.py +2 -2
- pub_analyzer/widgets/search/results.py +2 -12
- {pub_analyzer-0.4.3.dist-info → pub_analyzer-0.5.1.dist-info}/METADATA +7 -8
- {pub_analyzer-0.4.3.dist-info → pub_analyzer-0.5.1.dist-info}/RECORD +23 -23
- {pub_analyzer-0.4.3.dist-info → pub_analyzer-0.5.1.dist-info}/WHEEL +1 -1
- pub_analyzer/internal/templates/author/author_summary.typ +0 -112
- pub_analyzer/internal/templates/author/report.typ +0 -91
- pub_analyzer/internal/templates/author/sources.typ +0 -22
- pub_analyzer/internal/templates/author/works.typ +0 -154
- pub_analyzer/internal/templates/author/works_extended.typ +0 -109
- {pub_analyzer-0.4.3.dist-info → pub_analyzer-0.5.1.dist-info}/LICENSE +0 -0
- {pub_analyzer-0.4.3.dist-info → pub_analyzer-0.5.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/* COLORS */
|
|
2
|
+
$bg-main-color: white;
|
|
3
|
+
$bg-secondary-color: #e5e7eb;
|
|
4
|
+
$bg-secondary-color-accent: #d1d5db;
|
|
5
|
+
$text-primary-color: black;
|
|
6
|
+
|
|
7
|
+
$bg-main-color-darken: #1e293b;
|
|
8
|
+
$bg-secondary-color-darken: #0f172a;
|
|
9
|
+
$text-primary-color-darken: black;
|
|
10
|
+
|
|
11
|
+
$primary-color: #b91c1c;
|
|
12
|
+
$primary-color-accent: #991b1b;
|
|
13
|
+
$primary-color-highlight: #dc2626;
|
|
14
|
+
|
|
15
|
+
TextEditor {
|
|
16
|
+
#dialog {
|
|
17
|
+
margin: 0 10;
|
|
18
|
+
min-height: 10vh;
|
|
19
|
+
max-height: 60vh;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
#text-editor-container {
|
|
23
|
+
height: 1fr;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
TextArea{
|
|
27
|
+
height: auto;
|
|
28
|
+
padding: 1 3;
|
|
29
|
+
|
|
30
|
+
background: $bg-main-color;
|
|
31
|
+
border: none;
|
|
32
|
+
|
|
33
|
+
.text-area--cursor {
|
|
34
|
+
background: $primary-color;
|
|
35
|
+
}
|
|
36
|
+
.text-area--cursor-gutter {
|
|
37
|
+
color: $bg-main-color;
|
|
38
|
+
background: $primary-color-accent;
|
|
39
|
+
}
|
|
40
|
+
.text-area--cursor-line {
|
|
41
|
+
background: $bg-main-color;
|
|
42
|
+
}
|
|
43
|
+
.text-area--matching-bracket {
|
|
44
|
+
background: $primary-color-highlight 30%;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
#actions-buttons {
|
|
50
|
+
height: 3;
|
|
51
|
+
margin-top: 1;
|
|
52
|
+
margin-bottom: 2;
|
|
53
|
+
|
|
54
|
+
align: center middle;
|
|
55
|
+
|
|
56
|
+
Button {
|
|
57
|
+
margin: 0 5;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
pub_analyzer/css/report.tcss
CHANGED
|
@@ -67,29 +67,29 @@ LoadReportWidget .button-container {
|
|
|
67
67
|
}
|
|
68
68
|
|
|
69
69
|
/* Export Report Pane */
|
|
70
|
-
|
|
70
|
+
#export-form {
|
|
71
71
|
height: auto;
|
|
72
72
|
}
|
|
73
73
|
|
|
74
|
-
|
|
74
|
+
.export-form-input-container {
|
|
75
75
|
height: auto;
|
|
76
76
|
margin-bottom: 2;
|
|
77
77
|
}
|
|
78
78
|
|
|
79
|
-
|
|
79
|
+
.export-form-label {
|
|
80
80
|
width: 25vw;
|
|
81
81
|
border-bottom: solid $text-primary-color;
|
|
82
82
|
}
|
|
83
83
|
|
|
84
|
-
|
|
84
|
+
.file-selector-container {
|
|
85
85
|
height: 3;
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
-
|
|
88
|
+
.export-form-input {
|
|
89
89
|
width: 50vw;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
|
-
|
|
92
|
+
.export-form-buttons {
|
|
93
93
|
align: center middle;
|
|
94
94
|
height: 3;
|
|
95
95
|
}
|
|
@@ -113,6 +113,15 @@ WorkModal #dialog .abstract {
|
|
|
113
113
|
padding: 1 2;
|
|
114
114
|
}
|
|
115
115
|
|
|
116
|
+
WorkModal TabPane EditWidget {
|
|
117
|
+
height: 3;
|
|
118
|
+
margin-top: 1;
|
|
119
|
+
|
|
120
|
+
Horizontal {
|
|
121
|
+
align: center middle;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
116
125
|
WorkModal #dialog #tables-container {
|
|
117
126
|
margin: 1 0;
|
|
118
127
|
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Rate limiter module."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RateLimiter:
|
|
8
|
+
"""Rate limiter."""
|
|
9
|
+
|
|
10
|
+
def __init__(self, rate: int, per_second: float = 1.0) -> None:
|
|
11
|
+
self.rate = rate
|
|
12
|
+
self.per = per_second
|
|
13
|
+
self._tokens = float(rate)
|
|
14
|
+
self._updated_at = time.monotonic()
|
|
15
|
+
self._lock = asyncio.Lock()
|
|
16
|
+
|
|
17
|
+
async def acquire(self) -> None:
|
|
18
|
+
"""Wait until new token is available."""
|
|
19
|
+
while True:
|
|
20
|
+
async with self._lock:
|
|
21
|
+
now = time.monotonic()
|
|
22
|
+
elapsed = now - self._updated_at
|
|
23
|
+
if elapsed > 0:
|
|
24
|
+
self._tokens = min(self.rate, self._tokens + elapsed * (self.rate / self.per))
|
|
25
|
+
self._updated_at = now
|
|
26
|
+
|
|
27
|
+
if self._tokens >= 1.0:
|
|
28
|
+
self._tokens -= 1.0
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
missing = 1.0 - self._tokens
|
|
32
|
+
wait_time = missing * (self.per / self.rate)
|
|
33
|
+
|
|
34
|
+
await asyncio.sleep(wait_time)
|
pub_analyzer/internal/render.py
CHANGED
|
@@ -1,68 +1,41 @@
|
|
|
1
1
|
"""Render reports."""
|
|
2
2
|
|
|
3
3
|
import pathlib
|
|
4
|
+
import time
|
|
4
5
|
from importlib.metadata import version
|
|
5
6
|
|
|
6
7
|
import typst
|
|
7
|
-
from
|
|
8
|
+
from textual import log
|
|
8
9
|
|
|
9
10
|
from pub_analyzer.models.report import AuthorReport, InstitutionReport
|
|
10
11
|
|
|
11
12
|
|
|
12
|
-
|
|
13
|
-
"""Render report
|
|
14
|
-
|
|
15
|
-
Render the report to typst format using the templates.
|
|
13
|
+
def render_report(report: AuthorReport | InstitutionReport, file_path: pathlib.Path | None) -> bytes | None:
|
|
14
|
+
"""Render report to PDF.
|
|
16
15
|
|
|
17
16
|
Args:
|
|
18
17
|
report: Report Model.
|
|
18
|
+
file_path: Path to save the compiled file.
|
|
19
19
|
|
|
20
20
|
Returns:
|
|
21
|
-
|
|
21
|
+
PDF bytes or None if output file path is defined.
|
|
22
22
|
|
|
23
23
|
Raises:
|
|
24
|
-
|
|
24
|
+
SyntaxError: If typst compiler syntax error.
|
|
25
25
|
"""
|
|
26
26
|
if isinstance(report, AuthorReport):
|
|
27
|
-
templates_path = pathlib.Path(__file__).parent.resolve().joinpath("templates
|
|
27
|
+
templates_path = pathlib.Path(__file__).parent.resolve().joinpath("templates")
|
|
28
|
+
typst_file = templates_path / "author_report.typ"
|
|
28
29
|
if isinstance(report, InstitutionReport):
|
|
29
30
|
raise NotImplementedError
|
|
30
31
|
|
|
31
|
-
|
|
32
|
-
env = Environment(loader=FileSystemLoader(searchpath=templates_path), enable_async=True, trim_blocks=True, lstrip_blocks=True)
|
|
33
|
-
return await env.get_template("report.typ").render_async(report=report, version=version("pub-analyzer"))
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
async def render_report(report: AuthorReport | InstitutionReport, file_path: pathlib.Path) -> bytes:
|
|
37
|
-
"""Render report to PDF.
|
|
38
|
-
|
|
39
|
-
The specified path is not where the PDF file will be saved. The path is where the typst
|
|
40
|
-
file will be created (You can create a temporary path using the `tempfile` package).
|
|
41
|
-
This is done in this way because at the moment the typst package can only read the
|
|
42
|
-
document to be compiled from a file.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
report: Report Model.
|
|
46
|
-
file_path: Temporary directory for the typst file.
|
|
32
|
+
sys_inputs = {"report": report.model_dump_json(by_alias=True), "version": version("pub-analyzer")}
|
|
47
33
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
Raises:
|
|
52
|
-
SyntaxError: If typst compiler syntax error.
|
|
53
|
-
"""
|
|
54
|
-
template_render = await render_template_report(report=report)
|
|
55
|
-
|
|
56
|
-
# Write template to typst file
|
|
57
|
-
root = file_path.parent
|
|
58
|
-
temp_file = open(root.joinpath(file_path.stem + ".typ"), mode="w", encoding="utf-8")
|
|
59
|
-
temp_file.write(template_render)
|
|
60
|
-
temp_file.close()
|
|
61
|
-
|
|
62
|
-
# Render typst file
|
|
63
|
-
pdf_render = typst.compile(temp_file.name)
|
|
64
|
-
|
|
65
|
-
if isinstance(pdf_render, bytes):
|
|
66
|
-
return pdf_render
|
|
34
|
+
start_time = time.time()
|
|
35
|
+
if file_path:
|
|
36
|
+
result = typst.compile(input=typst_file, output=file_path, sys_inputs=sys_inputs)
|
|
67
37
|
else:
|
|
68
|
-
|
|
38
|
+
result = typst.compile(input=typst_file, sys_inputs=sys_inputs)
|
|
39
|
+
|
|
40
|
+
log.info(f"Typst compile time: {round((time.time() - start_time), 2)} seconds.")
|
|
41
|
+
return result
|
pub_analyzer/internal/report.py
CHANGED
|
@@ -6,8 +6,10 @@ from typing import Any, NewType
|
|
|
6
6
|
|
|
7
7
|
import httpx
|
|
8
8
|
from pydantic import TypeAdapter
|
|
9
|
+
from textual import log
|
|
9
10
|
|
|
10
11
|
from pub_analyzer.internal import identifier
|
|
12
|
+
from pub_analyzer.internal.limiter import RateLimiter
|
|
11
13
|
from pub_analyzer.models.author import Author, AuthorOpenAlexKey, AuthorResult, DehydratedAuthor
|
|
12
14
|
from pub_analyzer.models.institution import DehydratedInstitution, Institution, InstitutionOpenAlexKey, InstitutionResult
|
|
13
15
|
from pub_analyzer.models.report import (
|
|
@@ -30,6 +32,10 @@ FromDate = NewType("FromDate", datetime.datetime)
|
|
|
30
32
|
ToDate = NewType("ToDate", datetime.datetime)
|
|
31
33
|
"""DateTime marker for works published up to this date."""
|
|
32
34
|
|
|
35
|
+
REQUEST_RATE_PER_SECOND = 8
|
|
36
|
+
"""The OpenAlex API requires a maximum of 10 requests per second. We limit this to 8 per second."""
|
|
37
|
+
PER_PAGE_SIZE = 100
|
|
38
|
+
|
|
33
39
|
|
|
34
40
|
def _get_author_profiles_keys(
|
|
35
41
|
author: Author, extra_profiles: list[Author | AuthorResult | DehydratedAuthor] | None
|
|
@@ -138,10 +144,17 @@ def _get_valid_works(works: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
|
138
144
|
In response, we have chosen to exclude such works at this stage, thus avoiding
|
|
139
145
|
the need to handle exceptions within the Model validators.
|
|
140
146
|
"""
|
|
141
|
-
|
|
147
|
+
valid_works = []
|
|
148
|
+
for work in works:
|
|
149
|
+
if work["title"] is not None:
|
|
150
|
+
valid_works.append(_add_work_abstract(work))
|
|
151
|
+
else:
|
|
152
|
+
log.warning(f"Discarded work: {work['id']}")
|
|
153
|
+
|
|
154
|
+
return valid_works
|
|
142
155
|
|
|
143
156
|
|
|
144
|
-
async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
|
|
157
|
+
async def _get_works(client: httpx.AsyncClient, url: str, limiter: RateLimiter) -> list[Work]:
|
|
145
158
|
"""Get all works given a URL.
|
|
146
159
|
|
|
147
160
|
Iterate over all pages of the URL
|
|
@@ -156,7 +169,8 @@ async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
|
|
|
156
169
|
Raises:
|
|
157
170
|
httpx.HTTPStatusError: One response from OpenAlex API had an error HTTP status of 4xx or 5xx.
|
|
158
171
|
"""
|
|
159
|
-
|
|
172
|
+
await limiter.acquire()
|
|
173
|
+
response = await client.get(url=url, follow_redirects=True)
|
|
160
174
|
response.raise_for_status()
|
|
161
175
|
|
|
162
176
|
json_response = response.json()
|
|
@@ -166,13 +180,14 @@ async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
|
|
|
166
180
|
works_data = list(_get_valid_works(json_response["results"]))
|
|
167
181
|
|
|
168
182
|
for page_number in range(1, page_count):
|
|
169
|
-
|
|
183
|
+
await limiter.acquire()
|
|
184
|
+
page_result = (await client.get(url + f"&page={page_number + 1}", follow_redirects=True)).json()
|
|
170
185
|
works_data.extend(_get_valid_works(page_result["results"]))
|
|
171
186
|
|
|
172
187
|
return TypeAdapter(list[Work]).validate_python(works_data)
|
|
173
188
|
|
|
174
189
|
|
|
175
|
-
async def _get_source(client: httpx.AsyncClient, url: str) -> Source:
|
|
190
|
+
async def _get_source(client: httpx.AsyncClient, url: str, limiter: RateLimiter) -> Source:
|
|
176
191
|
"""Get source given a URL.
|
|
177
192
|
|
|
178
193
|
Args:
|
|
@@ -185,10 +200,18 @@ async def _get_source(client: httpx.AsyncClient, url: str) -> Source:
|
|
|
185
200
|
Raises:
|
|
186
201
|
httpx.HTTPStatusError: One response from OpenAlex API had an error HTTP status of 4xx or 5xx.
|
|
187
202
|
"""
|
|
188
|
-
|
|
203
|
+
await limiter.acquire()
|
|
204
|
+
response = await client.get(url=url, follow_redirects=True)
|
|
189
205
|
response.raise_for_status()
|
|
190
206
|
|
|
191
|
-
|
|
207
|
+
json_response = response.json()
|
|
208
|
+
hp_url = json_response["homepage_url"]
|
|
209
|
+
if isinstance(hp_url, str):
|
|
210
|
+
if not hp_url.startswith(("http", "https")):
|
|
211
|
+
json_response["homepage_url"] = None
|
|
212
|
+
log.warning(f"Discarted source homepage url: {url}")
|
|
213
|
+
|
|
214
|
+
return Source(**json_response)
|
|
192
215
|
|
|
193
216
|
|
|
194
217
|
async def make_author_report(
|
|
@@ -222,13 +245,12 @@ async def make_author_report(
|
|
|
222
245
|
|
|
223
246
|
pub_from_filter = f",from_publication_date:{pub_from_date:%Y-%m-%d}" if pub_from_date else ""
|
|
224
247
|
pub_to_filter = f",to_publication_date:{pub_to_date:%Y-%m-%d}" if pub_to_date else ""
|
|
225
|
-
url =
|
|
226
|
-
f"https://api.openalex.org/works?filter=author.id:{profiles_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date"
|
|
227
|
-
)
|
|
248
|
+
url = f"https://api.openalex.org/works?filter=author.id:{profiles_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
|
|
228
249
|
|
|
229
|
-
|
|
250
|
+
limiter = RateLimiter(rate=REQUEST_RATE_PER_SECOND, per_second=1.0)
|
|
251
|
+
async with httpx.AsyncClient(http2=True, timeout=None) as client:
|
|
230
252
|
# Getting all the author works.
|
|
231
|
-
author_works = await _get_works(client, url)
|
|
253
|
+
author_works = await _get_works(client, url, limiter)
|
|
232
254
|
|
|
233
255
|
# Extra filters
|
|
234
256
|
cited_from_filter = f",from_publication_date:{cited_from_date:%Y-%m-%d}" if cited_from_date else ""
|
|
@@ -242,12 +264,13 @@ async def make_author_report(
|
|
|
242
264
|
dehydrated_sources: list[DehydratedSource] = []
|
|
243
265
|
|
|
244
266
|
# Getting all works that have cited the author.
|
|
245
|
-
|
|
267
|
+
author_works_count = len(author_works)
|
|
268
|
+
for idx_work, author_work in enumerate(author_works, 1):
|
|
246
269
|
work_id = identifier.get_work_id(author_work)
|
|
270
|
+
log.info(f"[{work_id}] Work [{idx_work}/{author_works_count}]")
|
|
271
|
+
|
|
247
272
|
work_authors = _get_authors_list(authorships=author_work.authorships)
|
|
248
|
-
cited_by_api_url =
|
|
249
|
-
f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date"
|
|
250
|
-
)
|
|
273
|
+
cited_by_api_url = f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
|
|
251
274
|
|
|
252
275
|
# Adding the type of OpenAccess in the counter.
|
|
253
276
|
open_access_summary.add_oa_type(author_work.open_access.oa_status)
|
|
@@ -264,7 +287,7 @@ async def make_author_report(
|
|
|
264
287
|
if location.source and not any(source.id == location.source.id for source in dehydrated_sources):
|
|
265
288
|
dehydrated_sources.append(location.source)
|
|
266
289
|
|
|
267
|
-
cited_by_works = await _get_works(client, cited_by_api_url)
|
|
290
|
+
cited_by_works = await _get_works(client, cited_by_api_url, limiter)
|
|
268
291
|
cited_by: list[CitationReport] = []
|
|
269
292
|
work_citation_summary = CitationSummary()
|
|
270
293
|
for cited_by_work in cited_by_works:
|
|
@@ -281,10 +304,13 @@ async def make_author_report(
|
|
|
281
304
|
|
|
282
305
|
# Get sources full info.
|
|
283
306
|
sources: list[Source] = []
|
|
284
|
-
|
|
307
|
+
sources_count = len(dehydrated_sources)
|
|
308
|
+
for idx, dehydrated_source in enumerate(dehydrated_sources, 1):
|
|
285
309
|
source_id = identifier.get_source_id(dehydrated_source)
|
|
286
310
|
source_url = f"https://api.openalex.org/sources/{source_id}"
|
|
287
|
-
|
|
311
|
+
|
|
312
|
+
log.info(f"Getting Sources... [{idx}/{sources_count}]")
|
|
313
|
+
sources.append(await _get_source(client, source_url, limiter))
|
|
288
314
|
|
|
289
315
|
# Sort sources by h_index
|
|
290
316
|
sources_sorted = sorted(sources, key=lambda source: source.summary_stats.two_yr_mean_citedness, reverse=True)
|
|
@@ -331,11 +357,12 @@ async def make_institution_report(
|
|
|
331
357
|
|
|
332
358
|
pub_from_filter = f",from_publication_date:{pub_from_date:%Y-%m-%d}" if pub_from_date else ""
|
|
333
359
|
pub_to_filter = f",to_publication_date:{pub_to_date:%Y-%m-%d}" if pub_to_date else ""
|
|
334
|
-
url = f"https://api.openalex.org/works?filter=institutions.id:{institution_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date"
|
|
360
|
+
url = f"https://api.openalex.org/works?filter=institutions.id:{institution_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
|
|
335
361
|
|
|
336
|
-
|
|
362
|
+
limiter = RateLimiter(rate=REQUEST_RATE_PER_SECOND, per_second=1.0)
|
|
363
|
+
async with httpx.AsyncClient(http2=True, timeout=None) as client:
|
|
337
364
|
# Getting all the institution works.
|
|
338
|
-
institution_works = await _get_works(client=client, url=url)
|
|
365
|
+
institution_works = await _get_works(client=client, url=url, limiter=limiter)
|
|
339
366
|
|
|
340
367
|
# Extra filters
|
|
341
368
|
cited_from_filter = f",from_publication_date:{cited_from_date:%Y-%m-%d}" if cited_from_date else ""
|
|
@@ -349,12 +376,13 @@ async def make_institution_report(
|
|
|
349
376
|
dehydrated_sources: list[DehydratedSource] = []
|
|
350
377
|
|
|
351
378
|
# Getting all works that have cited a work.
|
|
352
|
-
|
|
379
|
+
institution_works_count = len(institution_works)
|
|
380
|
+
for idx_work, institution_work in enumerate(institution_works, 1):
|
|
353
381
|
work_id = identifier.get_work_id(institution_work)
|
|
382
|
+
log.info(f"[{work_id}] Work [{idx_work}/{institution_works_count}]")
|
|
383
|
+
|
|
354
384
|
work_authors = _get_authors_list(authorships=institution_work.authorships)
|
|
355
|
-
cited_by_api_url =
|
|
356
|
-
f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date"
|
|
357
|
-
)
|
|
385
|
+
cited_by_api_url = f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
|
|
358
386
|
|
|
359
387
|
# Adding the type of OpenAccess in the counter.
|
|
360
388
|
open_access_summary.add_oa_type(institution_work.open_access.oa_status)
|
|
@@ -371,7 +399,7 @@ async def make_institution_report(
|
|
|
371
399
|
if location.source and not any(source.id == location.source.id for source in dehydrated_sources):
|
|
372
400
|
dehydrated_sources.append(location.source)
|
|
373
401
|
|
|
374
|
-
cited_by_works = await _get_works(client, cited_by_api_url)
|
|
402
|
+
cited_by_works = await _get_works(client, cited_by_api_url, limiter)
|
|
375
403
|
cited_by: list[CitationReport] = []
|
|
376
404
|
work_citation_summary = CitationSummary()
|
|
377
405
|
for cited_by_work in cited_by_works:
|
|
@@ -388,10 +416,13 @@ async def make_institution_report(
|
|
|
388
416
|
|
|
389
417
|
# Get sources full info.
|
|
390
418
|
sources: list[Source] = []
|
|
391
|
-
|
|
419
|
+
sources_count = len(dehydrated_sources)
|
|
420
|
+
for idx, dehydrated_source in enumerate(dehydrated_sources, 1):
|
|
392
421
|
source_id = identifier.get_source_id(dehydrated_source)
|
|
393
422
|
source_url = f"https://api.openalex.org/sources/{source_id}"
|
|
394
|
-
|
|
423
|
+
|
|
424
|
+
log.debug(f"[{work_id}] Getting Sources... [{idx}/{sources_count}]")
|
|
425
|
+
sources.append(await _get_source(client, source_url, limiter))
|
|
395
426
|
|
|
396
427
|
# Sort sources by h_index
|
|
397
428
|
sources_sorted = sorted(sources, key=lambda source: source.summary_stats.two_yr_mean_citedness, reverse=True)
|