pub-analyzer 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pub-analyzer might be problematic. Click here for more details.
- pub_analyzer/internal/limiter.py +34 -0
- pub_analyzer/internal/report.py +24 -20
- pub_analyzer/internal/templates/author_report.typ +2 -2
- pub_analyzer/widgets/report/core.py +10 -0
- {pub_analyzer-0.5.0.dist-info → pub_analyzer-0.5.1.dist-info}/METADATA +2 -2
- {pub_analyzer-0.5.0.dist-info → pub_analyzer-0.5.1.dist-info}/RECORD +9 -8
- {pub_analyzer-0.5.0.dist-info → pub_analyzer-0.5.1.dist-info}/LICENSE +0 -0
- {pub_analyzer-0.5.0.dist-info → pub_analyzer-0.5.1.dist-info}/WHEEL +0 -0
- {pub_analyzer-0.5.0.dist-info → pub_analyzer-0.5.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Rate limiter module."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RateLimiter:
|
|
8
|
+
"""Rate limiter."""
|
|
9
|
+
|
|
10
|
+
def __init__(self, rate: int, per_second: float = 1.0) -> None:
|
|
11
|
+
self.rate = rate
|
|
12
|
+
self.per = per_second
|
|
13
|
+
self._tokens = float(rate)
|
|
14
|
+
self._updated_at = time.monotonic()
|
|
15
|
+
self._lock = asyncio.Lock()
|
|
16
|
+
|
|
17
|
+
async def acquire(self) -> None:
|
|
18
|
+
"""Wait until new token is available."""
|
|
19
|
+
while True:
|
|
20
|
+
async with self._lock:
|
|
21
|
+
now = time.monotonic()
|
|
22
|
+
elapsed = now - self._updated_at
|
|
23
|
+
if elapsed > 0:
|
|
24
|
+
self._tokens = min(self.rate, self._tokens + elapsed * (self.rate / self.per))
|
|
25
|
+
self._updated_at = now
|
|
26
|
+
|
|
27
|
+
if self._tokens >= 1.0:
|
|
28
|
+
self._tokens -= 1.0
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
missing = 1.0 - self._tokens
|
|
32
|
+
wait_time = missing * (self.per / self.rate)
|
|
33
|
+
|
|
34
|
+
await asyncio.sleep(wait_time)
|
pub_analyzer/internal/report.py
CHANGED
|
@@ -9,6 +9,7 @@ from pydantic import TypeAdapter
|
|
|
9
9
|
from textual import log
|
|
10
10
|
|
|
11
11
|
from pub_analyzer.internal import identifier
|
|
12
|
+
from pub_analyzer.internal.limiter import RateLimiter
|
|
12
13
|
from pub_analyzer.models.author import Author, AuthorOpenAlexKey, AuthorResult, DehydratedAuthor
|
|
13
14
|
from pub_analyzer.models.institution import DehydratedInstitution, Institution, InstitutionOpenAlexKey, InstitutionResult
|
|
14
15
|
from pub_analyzer.models.report import (
|
|
@@ -31,6 +32,10 @@ FromDate = NewType("FromDate", datetime.datetime)
|
|
|
31
32
|
ToDate = NewType("ToDate", datetime.datetime)
|
|
32
33
|
"""DateTime marker for works published up to this date."""
|
|
33
34
|
|
|
35
|
+
REQUEST_RATE_PER_SECOND = 8
|
|
36
|
+
"""The OpenAlex API requires a maximum of 10 requests per second. We limit this to 8 per second."""
|
|
37
|
+
PER_PAGE_SIZE = 100
|
|
38
|
+
|
|
34
39
|
|
|
35
40
|
def _get_author_profiles_keys(
|
|
36
41
|
author: Author, extra_profiles: list[Author | AuthorResult | DehydratedAuthor] | None
|
|
@@ -149,7 +154,7 @@ def _get_valid_works(works: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
|
|
149
154
|
return valid_works
|
|
150
155
|
|
|
151
156
|
|
|
152
|
-
async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
|
|
157
|
+
async def _get_works(client: httpx.AsyncClient, url: str, limiter: RateLimiter) -> list[Work]:
|
|
153
158
|
"""Get all works given a URL.
|
|
154
159
|
|
|
155
160
|
Iterate over all pages of the URL
|
|
@@ -164,6 +169,7 @@ async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
|
|
|
164
169
|
Raises:
|
|
165
170
|
httpx.HTTPStatusError: One response from OpenAlex API had an error HTTP status of 4xx or 5xx.
|
|
166
171
|
"""
|
|
172
|
+
await limiter.acquire()
|
|
167
173
|
response = await client.get(url=url, follow_redirects=True)
|
|
168
174
|
response.raise_for_status()
|
|
169
175
|
|
|
@@ -174,13 +180,14 @@ async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
|
|
|
174
180
|
works_data = list(_get_valid_works(json_response["results"]))
|
|
175
181
|
|
|
176
182
|
for page_number in range(1, page_count):
|
|
183
|
+
await limiter.acquire()
|
|
177
184
|
page_result = (await client.get(url + f"&page={page_number + 1}", follow_redirects=True)).json()
|
|
178
185
|
works_data.extend(_get_valid_works(page_result["results"]))
|
|
179
186
|
|
|
180
187
|
return TypeAdapter(list[Work]).validate_python(works_data)
|
|
181
188
|
|
|
182
189
|
|
|
183
|
-
async def _get_source(client: httpx.AsyncClient, url: str) -> Source:
|
|
190
|
+
async def _get_source(client: httpx.AsyncClient, url: str, limiter: RateLimiter) -> Source:
|
|
184
191
|
"""Get source given a URL.
|
|
185
192
|
|
|
186
193
|
Args:
|
|
@@ -193,6 +200,7 @@ async def _get_source(client: httpx.AsyncClient, url: str) -> Source:
|
|
|
193
200
|
Raises:
|
|
194
201
|
httpx.HTTPStatusError: One response from OpenAlex API had an error HTTP status of 4xx or 5xx.
|
|
195
202
|
"""
|
|
203
|
+
await limiter.acquire()
|
|
196
204
|
response = await client.get(url=url, follow_redirects=True)
|
|
197
205
|
response.raise_for_status()
|
|
198
206
|
|
|
@@ -237,13 +245,12 @@ async def make_author_report(
|
|
|
237
245
|
|
|
238
246
|
pub_from_filter = f",from_publication_date:{pub_from_date:%Y-%m-%d}" if pub_from_date else ""
|
|
239
247
|
pub_to_filter = f",to_publication_date:{pub_to_date:%Y-%m-%d}" if pub_to_date else ""
|
|
240
|
-
url =
|
|
241
|
-
f"https://api.openalex.org/works?filter=author.id:{profiles_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date"
|
|
242
|
-
)
|
|
248
|
+
url = f"https://api.openalex.org/works?filter=author.id:{profiles_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
|
|
243
249
|
|
|
244
|
-
|
|
250
|
+
limiter = RateLimiter(rate=REQUEST_RATE_PER_SECOND, per_second=1.0)
|
|
251
|
+
async with httpx.AsyncClient(http2=True, timeout=None) as client:
|
|
245
252
|
# Getting all the author works.
|
|
246
|
-
author_works = await _get_works(client, url)
|
|
253
|
+
author_works = await _get_works(client, url, limiter)
|
|
247
254
|
|
|
248
255
|
# Extra filters
|
|
249
256
|
cited_from_filter = f",from_publication_date:{cited_from_date:%Y-%m-%d}" if cited_from_date else ""
|
|
@@ -263,9 +270,7 @@ async def make_author_report(
|
|
|
263
270
|
log.info(f"[{work_id}] Work [{idx_work}/{author_works_count}]")
|
|
264
271
|
|
|
265
272
|
work_authors = _get_authors_list(authorships=author_work.authorships)
|
|
266
|
-
cited_by_api_url =
|
|
267
|
-
f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date"
|
|
268
|
-
)
|
|
273
|
+
cited_by_api_url = f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
|
|
269
274
|
|
|
270
275
|
# Adding the type of OpenAccess in the counter.
|
|
271
276
|
open_access_summary.add_oa_type(author_work.open_access.oa_status)
|
|
@@ -282,7 +287,7 @@ async def make_author_report(
|
|
|
282
287
|
if location.source and not any(source.id == location.source.id for source in dehydrated_sources):
|
|
283
288
|
dehydrated_sources.append(location.source)
|
|
284
289
|
|
|
285
|
-
cited_by_works = await _get_works(client, cited_by_api_url)
|
|
290
|
+
cited_by_works = await _get_works(client, cited_by_api_url, limiter)
|
|
286
291
|
cited_by: list[CitationReport] = []
|
|
287
292
|
work_citation_summary = CitationSummary()
|
|
288
293
|
for cited_by_work in cited_by_works:
|
|
@@ -305,7 +310,7 @@ async def make_author_report(
|
|
|
305
310
|
source_url = f"https://api.openalex.org/sources/{source_id}"
|
|
306
311
|
|
|
307
312
|
log.info(f"Getting Sources... [{idx}/{sources_count}]")
|
|
308
|
-
sources.append(await _get_source(client, source_url))
|
|
313
|
+
sources.append(await _get_source(client, source_url, limiter))
|
|
309
314
|
|
|
310
315
|
# Sort sources by h_index
|
|
311
316
|
sources_sorted = sorted(sources, key=lambda source: source.summary_stats.two_yr_mean_citedness, reverse=True)
|
|
@@ -352,11 +357,12 @@ async def make_institution_report(
|
|
|
352
357
|
|
|
353
358
|
pub_from_filter = f",from_publication_date:{pub_from_date:%Y-%m-%d}" if pub_from_date else ""
|
|
354
359
|
pub_to_filter = f",to_publication_date:{pub_to_date:%Y-%m-%d}" if pub_to_date else ""
|
|
355
|
-
url = f"https://api.openalex.org/works?filter=institutions.id:{institution_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date"
|
|
360
|
+
url = f"https://api.openalex.org/works?filter=institutions.id:{institution_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
|
|
356
361
|
|
|
357
|
-
|
|
362
|
+
limiter = RateLimiter(rate=REQUEST_RATE_PER_SECOND, per_second=1.0)
|
|
363
|
+
async with httpx.AsyncClient(http2=True, timeout=None) as client:
|
|
358
364
|
# Getting all the institution works.
|
|
359
|
-
institution_works = await _get_works(client=client, url=url)
|
|
365
|
+
institution_works = await _get_works(client=client, url=url, limiter=limiter)
|
|
360
366
|
|
|
361
367
|
# Extra filters
|
|
362
368
|
cited_from_filter = f",from_publication_date:{cited_from_date:%Y-%m-%d}" if cited_from_date else ""
|
|
@@ -376,9 +382,7 @@ async def make_institution_report(
|
|
|
376
382
|
log.info(f"[{work_id}] Work [{idx_work}/{institution_works_count}]")
|
|
377
383
|
|
|
378
384
|
work_authors = _get_authors_list(authorships=institution_work.authorships)
|
|
379
|
-
cited_by_api_url =
|
|
380
|
-
f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date"
|
|
381
|
-
)
|
|
385
|
+
cited_by_api_url = f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
|
|
382
386
|
|
|
383
387
|
# Adding the type of OpenAccess in the counter.
|
|
384
388
|
open_access_summary.add_oa_type(institution_work.open_access.oa_status)
|
|
@@ -395,7 +399,7 @@ async def make_institution_report(
|
|
|
395
399
|
if location.source and not any(source.id == location.source.id for source in dehydrated_sources):
|
|
396
400
|
dehydrated_sources.append(location.source)
|
|
397
401
|
|
|
398
|
-
cited_by_works = await _get_works(client, cited_by_api_url)
|
|
402
|
+
cited_by_works = await _get_works(client, cited_by_api_url, limiter)
|
|
399
403
|
cited_by: list[CitationReport] = []
|
|
400
404
|
work_citation_summary = CitationSummary()
|
|
401
405
|
for cited_by_work in cited_by_works:
|
|
@@ -418,7 +422,7 @@ async def make_institution_report(
|
|
|
418
422
|
source_url = f"https://api.openalex.org/sources/{source_id}"
|
|
419
423
|
|
|
420
424
|
log.debug(f"[{work_id}] Getting Sources... [{idx}/{sources_count}]")
|
|
421
|
-
sources.append(await _get_source(client, source_url))
|
|
425
|
+
sources.append(await _get_source(client, source_url, limiter))
|
|
422
426
|
|
|
423
427
|
# Sort sources by h_index
|
|
424
428
|
sources_sorted = sorted(sources, key=lambda source: source.summary_stats.two_yr_mean_citedness, reverse=True)
|
|
@@ -191,7 +191,7 @@
|
|
|
191
191
|
[*Year*], [*Works count*], [*Cited by count*],
|
|
192
192
|
|
|
193
193
|
// Content
|
|
194
|
-
..author.at("counts_by_year").slice(0, 8).map(
|
|
194
|
+
..author.at("counts_by_year").slice(0, calc.min(author.at("counts_by_year").len(), 8)).map(
|
|
195
195
|
((year, works_count, cited_by_count)) => (
|
|
196
196
|
table.cell([#year]),
|
|
197
197
|
table.cell([#works_count]),
|
|
@@ -216,7 +216,7 @@
|
|
|
216
216
|
x-label: none, y-label: none,
|
|
217
217
|
{
|
|
218
218
|
plot.add((
|
|
219
|
-
..author.at("counts_by_year").slice(0, 8).map(
|
|
219
|
+
..author.at("counts_by_year").slice(0, calc.min(author.at("counts_by_year").len(), 8)).map(
|
|
220
220
|
((year, works_count, cited_by_count)) => (
|
|
221
221
|
(year, cited_by_count)
|
|
222
222
|
)
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
import datetime
|
|
4
4
|
import pathlib
|
|
5
5
|
from enum import Enum
|
|
6
|
+
from time import time
|
|
6
7
|
from typing import ClassVar
|
|
7
8
|
|
|
8
9
|
import httpx
|
|
@@ -105,7 +106,9 @@ class CreateReportWidget(Static):
|
|
|
105
106
|
async def mount_report(self) -> None:
|
|
106
107
|
"""Mount report."""
|
|
107
108
|
try:
|
|
109
|
+
start = time()
|
|
108
110
|
report_widget = await self.make_report()
|
|
111
|
+
elapsed = time() - start
|
|
109
112
|
except httpx.HTTPStatusError as exc:
|
|
110
113
|
self.query_one(LoadingIndicator).display = False
|
|
111
114
|
status_error = f"HTTP Exception for url: {exc.request.url}. Status code: {exc.response.status_code}"
|
|
@@ -117,6 +120,13 @@ class CreateReportWidget(Static):
|
|
|
117
120
|
)
|
|
118
121
|
return None
|
|
119
122
|
|
|
123
|
+
self.app.notify(
|
|
124
|
+
title="Report created!",
|
|
125
|
+
message=f"Elapsed {elapsed:.2f}s",
|
|
126
|
+
severity="information",
|
|
127
|
+
timeout=20.0,
|
|
128
|
+
)
|
|
129
|
+
|
|
120
130
|
container = self.query_one(Container)
|
|
121
131
|
await container.mount(report_widget)
|
|
122
132
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: pub-analyzer
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.1
|
|
4
4
|
Summary: A text user interface, written in python, which automates the generation of scientific production reports using OpenAlex
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Alejandro Gaspar
|
|
@@ -22,7 +22,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.12
|
|
23
23
|
Classifier: Programming Language :: Python :: 3.13
|
|
24
24
|
Classifier: Typing :: Typed
|
|
25
|
-
Requires-Dist: httpx (==0.28.1)
|
|
25
|
+
Requires-Dist: httpx[http2] (==0.28.1)
|
|
26
26
|
Requires-Dist: pydantic (==2.11.7)
|
|
27
27
|
Requires-Dist: textual (==0.85.2)
|
|
28
28
|
Requires-Dist: typst (==0.13.2)
|
|
@@ -13,9 +13,10 @@ pub_analyzer/css/tabs.tcss,sha256=dS7y6ZZmo1Vw7Wqpx66-O-oE7zeqPE9reWqIhQ1KcZs,31
|
|
|
13
13
|
pub_analyzer/css/tree.tcss,sha256=5BSabX9ZmRL3VTz0Gya2RRJnWrwdIF9cTf6dXj2R4kE,818
|
|
14
14
|
pub_analyzer/internal/__init__.py,sha256=9aqrBJDedUiBO5kEO81kSAuPbOSFoaDZZK8w5NydPhs,22
|
|
15
15
|
pub_analyzer/internal/identifier.py,sha256=LDYew25TLuwqJHmLg9iRNTURWynN27ZbTxTVGbuOUD0,2939
|
|
16
|
+
pub_analyzer/internal/limiter.py,sha256=1YaVBSSG7IfFg0nhD_up21NNL_H2Q4qaIQTvZS674Vo,1002
|
|
16
17
|
pub_analyzer/internal/render.py,sha256=uF1LsY39UkTpkTJgU4hyYnVv6b1MCQayubrPwrGW2DI,1271
|
|
17
|
-
pub_analyzer/internal/report.py,sha256=
|
|
18
|
-
pub_analyzer/internal/templates/author_report.typ,sha256=
|
|
18
|
+
pub_analyzer/internal/report.py,sha256=RnX3EELW33ABwEu1W506_0q7gWQyF6Rcds-YAbXYlow,18046
|
|
19
|
+
pub_analyzer/internal/templates/author_report.typ,sha256=XdqPmBptlC46vDORDFs-YaILehWh7lDuCo0cbyMPGHo,16927
|
|
19
20
|
pub_analyzer/main.py,sha256=0iNj4cggG-HJ8FMODwZ67Yp3-GaFPw-gUEcSCCzwMcc,2332
|
|
20
21
|
pub_analyzer/models/__init__.py,sha256=hvR6m379slQw7gSwnl_OFY21Ytv90mmmOe7bp8vZYkk,59
|
|
21
22
|
pub_analyzer/models/author.py,sha256=NvFmvSsmnchz2lo7m69NE3kjLYP0CXICRAolnvcznW8,2118
|
|
@@ -48,7 +49,7 @@ pub_analyzer/widgets/report/__init__.py,sha256=oolRVss3JKaQHaQVDncjtxbLINRJ5Rd1u
|
|
|
48
49
|
pub_analyzer/widgets/report/author.py,sha256=IEfRDfsA8jcmFwQQk1O-iuh8MKr4DbzBPpjoE8xECZA,1459
|
|
49
50
|
pub_analyzer/widgets/report/cards.py,sha256=2jf9cpfzVFZO0I9b29bkNaVhENMnfL26etEpUG-NMk0,4854
|
|
50
51
|
pub_analyzer/widgets/report/concept.py,sha256=xiGXy_RXO_XmdqnlePkOozYPmQrsDdqKPMRXHsZbDP0,1485
|
|
51
|
-
pub_analyzer/widgets/report/core.py,sha256=
|
|
52
|
+
pub_analyzer/widgets/report/core.py,sha256=Bgy_fK-IwGjoIidcr687xXsHzN3LEml-A3ykyXNeVW8,11704
|
|
52
53
|
pub_analyzer/widgets/report/editor.py,sha256=WlhjNQCrqeot2rvV1266Vr8yDYJQLL1lJ1XY040UoJI,2768
|
|
53
54
|
pub_analyzer/widgets/report/export.py,sha256=as2yM2FXsqgvMnF4KVWVuxboULXqJ62v7wzMYek23s4,4633
|
|
54
55
|
pub_analyzer/widgets/report/grants.py,sha256=m183W6djVhucAuYs-EhjkHuA9heqpGwsW_iRouVQsns,1347
|
|
@@ -61,8 +62,8 @@ pub_analyzer/widgets/search/__init__.py,sha256=90L9IghqXD2jAWBKWK6-UeHLSVlci7D3_
|
|
|
61
62
|
pub_analyzer/widgets/search/core.py,sha256=4NvowtBcrH1fmob9kuF7v9Tq3Nd99jzB2S7xaD8OYeI,3861
|
|
62
63
|
pub_analyzer/widgets/search/results.py,sha256=3ko7zcToGp9MV-mzz_9uTJxSec7IozlIWDZe7QeRmj0,3703
|
|
63
64
|
pub_analyzer/widgets/sidebar.py,sha256=XlIshlCVW5Bb3MXFPnU9is0qQrUrGdT6xlkKiYNEcAM,2704
|
|
64
|
-
pub_analyzer-0.5.
|
|
65
|
-
pub_analyzer-0.5.
|
|
66
|
-
pub_analyzer-0.5.
|
|
67
|
-
pub_analyzer-0.5.
|
|
68
|
-
pub_analyzer-0.5.
|
|
65
|
+
pub_analyzer-0.5.1.dist-info/LICENSE,sha256=OPopoEowTMKqIea8Kbxk3TKdCQ97YkLvIknjTHE5oCI,1080
|
|
66
|
+
pub_analyzer-0.5.1.dist-info/METADATA,sha256=ebqDbZ41qqzCXNSHqmxabArEXoEcWsmx9BNOML_r8Mk,4547
|
|
67
|
+
pub_analyzer-0.5.1.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
|
68
|
+
pub_analyzer-0.5.1.dist-info/entry_points.txt,sha256=mVb_gUNX_-aVWHlNKLjcMAS8YLgNnSq9JLRXVJGIF2c,54
|
|
69
|
+
pub_analyzer-0.5.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|