pub-analyzer 0.5.0__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pub-analyzer might be problematic. Click here for more details.

Files changed (68) hide show
  1. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/PKG-INFO +2 -2
  2. pub_analyzer-0.5.1/pub_analyzer/internal/limiter.py +34 -0
  3. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/internal/report.py +24 -20
  4. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/internal/templates/author_report.typ +2 -2
  5. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/core.py +10 -0
  6. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pyproject.toml +2 -2
  7. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/LICENSE +0 -0
  8. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/README.md +0 -0
  9. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/__init__.py +0 -0
  10. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/body.tcss +0 -0
  11. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/buttons.tcss +0 -0
  12. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/checkbox.tcss +0 -0
  13. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/collapsible.tcss +0 -0
  14. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/datatable.tcss +0 -0
  15. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/editor.tcss +0 -0
  16. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/main.tcss +0 -0
  17. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/report.tcss +0 -0
  18. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/search.tcss +0 -0
  19. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/summary.tcss +0 -0
  20. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/tabs.tcss +0 -0
  21. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/css/tree.tcss +0 -0
  22. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/internal/__init__.py +0 -0
  23. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/internal/identifier.py +0 -0
  24. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/internal/render.py +0 -0
  25. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/main.py +0 -0
  26. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/models/__init__.py +0 -0
  27. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/models/author.py +0 -0
  28. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/models/concept.py +0 -0
  29. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/models/institution.py +0 -0
  30. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/models/report.py +0 -0
  31. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/models/source.py +0 -0
  32. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/models/topic.py +0 -0
  33. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/models/work.py +0 -0
  34. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/__init__.py +0 -0
  35. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/author/__init__.py +0 -0
  36. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/author/cards.py +0 -0
  37. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/author/core.py +0 -0
  38. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/author/tables.py +0 -0
  39. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/body.py +0 -0
  40. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/common/__init__.py +0 -0
  41. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/common/card.py +0 -0
  42. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/common/filesystem.py +0 -0
  43. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/common/filters.py +0 -0
  44. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/common/input.py +0 -0
  45. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/common/label.py +0 -0
  46. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/common/modal.py +0 -0
  47. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/common/selector.py +0 -0
  48. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/common/summary.py +0 -0
  49. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/institution/__init__.py +0 -0
  50. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/institution/cards.py +0 -0
  51. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/institution/core.py +0 -0
  52. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/institution/tables.py +0 -0
  53. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/__init__.py +0 -0
  54. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/author.py +0 -0
  55. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/cards.py +0 -0
  56. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/concept.py +0 -0
  57. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/editor.py +0 -0
  58. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/export.py +0 -0
  59. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/grants.py +0 -0
  60. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/institution.py +0 -0
  61. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/locations.py +0 -0
  62. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/source.py +0 -0
  63. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/topic.py +0 -0
  64. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/report/work.py +0 -0
  65. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/search/__init__.py +0 -0
  66. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/search/core.py +0 -0
  67. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/search/results.py +0 -0
  68. {pub_analyzer-0.5.0 → pub_analyzer-0.5.1}/pub_analyzer/widgets/sidebar.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pub-analyzer
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: A text user interface, written in python, which automates the generation of scientific production reports using OpenAlex
5
5
  License: MIT
6
6
  Author: Alejandro Gaspar
@@ -22,7 +22,7 @@ Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Programming Language :: Python :: 3.12
23
23
  Classifier: Programming Language :: Python :: 3.13
24
24
  Classifier: Typing :: Typed
25
- Requires-Dist: httpx (==0.28.1)
25
+ Requires-Dist: httpx[http2] (==0.28.1)
26
26
  Requires-Dist: pydantic (==2.11.7)
27
27
  Requires-Dist: textual (==0.85.2)
28
28
  Requires-Dist: typst (==0.13.2)
@@ -0,0 +1,34 @@
1
+ """Rate limiter module."""
2
+
3
+ import asyncio
4
+ import time
5
+
6
+
7
+ class RateLimiter:
8
+ """Rate limiter."""
9
+
10
+ def __init__(self, rate: int, per_second: float = 1.0) -> None:
11
+ self.rate = rate
12
+ self.per = per_second
13
+ self._tokens = float(rate)
14
+ self._updated_at = time.monotonic()
15
+ self._lock = asyncio.Lock()
16
+
17
+ async def acquire(self) -> None:
18
+ """Wait until new token is available."""
19
+ while True:
20
+ async with self._lock:
21
+ now = time.monotonic()
22
+ elapsed = now - self._updated_at
23
+ if elapsed > 0:
24
+ self._tokens = min(self.rate, self._tokens + elapsed * (self.rate / self.per))
25
+ self._updated_at = now
26
+
27
+ if self._tokens >= 1.0:
28
+ self._tokens -= 1.0
29
+ return
30
+
31
+ missing = 1.0 - self._tokens
32
+ wait_time = missing * (self.per / self.rate)
33
+
34
+ await asyncio.sleep(wait_time)
@@ -9,6 +9,7 @@ from pydantic import TypeAdapter
9
9
  from textual import log
10
10
 
11
11
  from pub_analyzer.internal import identifier
12
+ from pub_analyzer.internal.limiter import RateLimiter
12
13
  from pub_analyzer.models.author import Author, AuthorOpenAlexKey, AuthorResult, DehydratedAuthor
13
14
  from pub_analyzer.models.institution import DehydratedInstitution, Institution, InstitutionOpenAlexKey, InstitutionResult
14
15
  from pub_analyzer.models.report import (
@@ -31,6 +32,10 @@ FromDate = NewType("FromDate", datetime.datetime)
31
32
  ToDate = NewType("ToDate", datetime.datetime)
32
33
  """DateTime marker for works published up to this date."""
33
34
 
35
+ REQUEST_RATE_PER_SECOND = 8
36
+ """The OpenAlex API requires a maximum of 10 requests per second. We limit this to 8 per second."""
37
+ PER_PAGE_SIZE = 100
38
+
34
39
 
35
40
  def _get_author_profiles_keys(
36
41
  author: Author, extra_profiles: list[Author | AuthorResult | DehydratedAuthor] | None
@@ -149,7 +154,7 @@ def _get_valid_works(works: list[dict[str, Any]]) -> list[dict[str, Any]]:
149
154
  return valid_works
150
155
 
151
156
 
152
- async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
157
+ async def _get_works(client: httpx.AsyncClient, url: str, limiter: RateLimiter) -> list[Work]:
153
158
  """Get all works given a URL.
154
159
 
155
160
  Iterate over all pages of the URL
@@ -164,6 +169,7 @@ async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
164
169
  Raises:
165
170
  httpx.HTTPStatusError: One response from OpenAlex API had an error HTTP status of 4xx or 5xx.
166
171
  """
172
+ await limiter.acquire()
167
173
  response = await client.get(url=url, follow_redirects=True)
168
174
  response.raise_for_status()
169
175
 
@@ -174,13 +180,14 @@ async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
174
180
  works_data = list(_get_valid_works(json_response["results"]))
175
181
 
176
182
  for page_number in range(1, page_count):
183
+ await limiter.acquire()
177
184
  page_result = (await client.get(url + f"&page={page_number + 1}", follow_redirects=True)).json()
178
185
  works_data.extend(_get_valid_works(page_result["results"]))
179
186
 
180
187
  return TypeAdapter(list[Work]).validate_python(works_data)
181
188
 
182
189
 
183
- async def _get_source(client: httpx.AsyncClient, url: str) -> Source:
190
+ async def _get_source(client: httpx.AsyncClient, url: str, limiter: RateLimiter) -> Source:
184
191
  """Get source given a URL.
185
192
 
186
193
  Args:
@@ -193,6 +200,7 @@ async def _get_source(client: httpx.AsyncClient, url: str) -> Source:
193
200
  Raises:
194
201
  httpx.HTTPStatusError: One response from OpenAlex API had an error HTTP status of 4xx or 5xx.
195
202
  """
203
+ await limiter.acquire()
196
204
  response = await client.get(url=url, follow_redirects=True)
197
205
  response.raise_for_status()
198
206
 
@@ -237,13 +245,12 @@ async def make_author_report(
237
245
 
238
246
  pub_from_filter = f",from_publication_date:{pub_from_date:%Y-%m-%d}" if pub_from_date else ""
239
247
  pub_to_filter = f",to_publication_date:{pub_to_date:%Y-%m-%d}" if pub_to_date else ""
240
- url = (
241
- f"https://api.openalex.org/works?filter=author.id:{profiles_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date"
242
- )
248
+ url = f"https://api.openalex.org/works?filter=author.id:{profiles_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
243
249
 
244
- async with httpx.AsyncClient() as client:
250
+ limiter = RateLimiter(rate=REQUEST_RATE_PER_SECOND, per_second=1.0)
251
+ async with httpx.AsyncClient(http2=True, timeout=None) as client:
245
252
  # Getting all the author works.
246
- author_works = await _get_works(client, url)
253
+ author_works = await _get_works(client, url, limiter)
247
254
 
248
255
  # Extra filters
249
256
  cited_from_filter = f",from_publication_date:{cited_from_date:%Y-%m-%d}" if cited_from_date else ""
@@ -263,9 +270,7 @@ async def make_author_report(
263
270
  log.info(f"[{work_id}] Work [{idx_work}/{author_works_count}]")
264
271
 
265
272
  work_authors = _get_authors_list(authorships=author_work.authorships)
266
- cited_by_api_url = (
267
- f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date"
268
- )
273
+ cited_by_api_url = f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
269
274
 
270
275
  # Adding the type of OpenAccess in the counter.
271
276
  open_access_summary.add_oa_type(author_work.open_access.oa_status)
@@ -282,7 +287,7 @@ async def make_author_report(
282
287
  if location.source and not any(source.id == location.source.id for source in dehydrated_sources):
283
288
  dehydrated_sources.append(location.source)
284
289
 
285
- cited_by_works = await _get_works(client, cited_by_api_url)
290
+ cited_by_works = await _get_works(client, cited_by_api_url, limiter)
286
291
  cited_by: list[CitationReport] = []
287
292
  work_citation_summary = CitationSummary()
288
293
  for cited_by_work in cited_by_works:
@@ -305,7 +310,7 @@ async def make_author_report(
305
310
  source_url = f"https://api.openalex.org/sources/{source_id}"
306
311
 
307
312
  log.info(f"Getting Sources... [{idx}/{sources_count}]")
308
- sources.append(await _get_source(client, source_url))
313
+ sources.append(await _get_source(client, source_url, limiter))
309
314
 
310
315
  # Sort sources by h_index
311
316
  sources_sorted = sorted(sources, key=lambda source: source.summary_stats.two_yr_mean_citedness, reverse=True)
@@ -352,11 +357,12 @@ async def make_institution_report(
352
357
 
353
358
  pub_from_filter = f",from_publication_date:{pub_from_date:%Y-%m-%d}" if pub_from_date else ""
354
359
  pub_to_filter = f",to_publication_date:{pub_to_date:%Y-%m-%d}" if pub_to_date else ""
355
- url = f"https://api.openalex.org/works?filter=institutions.id:{institution_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date"
360
+ url = f"https://api.openalex.org/works?filter=institutions.id:{institution_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
356
361
 
357
- async with httpx.AsyncClient() as client:
362
+ limiter = RateLimiter(rate=REQUEST_RATE_PER_SECOND, per_second=1.0)
363
+ async with httpx.AsyncClient(http2=True, timeout=None) as client:
358
364
  # Getting all the institution works.
359
- institution_works = await _get_works(client=client, url=url)
365
+ institution_works = await _get_works(client=client, url=url, limiter=limiter)
360
366
 
361
367
  # Extra filters
362
368
  cited_from_filter = f",from_publication_date:{cited_from_date:%Y-%m-%d}" if cited_from_date else ""
@@ -376,9 +382,7 @@ async def make_institution_report(
376
382
  log.info(f"[{work_id}] Work [{idx_work}/{institution_works_count}]")
377
383
 
378
384
  work_authors = _get_authors_list(authorships=institution_work.authorships)
379
- cited_by_api_url = (
380
- f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date"
381
- )
385
+ cited_by_api_url = f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
382
386
 
383
387
  # Adding the type of OpenAccess in the counter.
384
388
  open_access_summary.add_oa_type(institution_work.open_access.oa_status)
@@ -395,7 +399,7 @@ async def make_institution_report(
395
399
  if location.source and not any(source.id == location.source.id for source in dehydrated_sources):
396
400
  dehydrated_sources.append(location.source)
397
401
 
398
- cited_by_works = await _get_works(client, cited_by_api_url)
402
+ cited_by_works = await _get_works(client, cited_by_api_url, limiter)
399
403
  cited_by: list[CitationReport] = []
400
404
  work_citation_summary = CitationSummary()
401
405
  for cited_by_work in cited_by_works:
@@ -418,7 +422,7 @@ async def make_institution_report(
418
422
  source_url = f"https://api.openalex.org/sources/{source_id}"
419
423
 
420
424
  log.debug(f"[{work_id}] Getting Sources... [{idx}/{sources_count}]")
421
- sources.append(await _get_source(client, source_url))
425
+ sources.append(await _get_source(client, source_url, limiter))
422
426
 
423
427
  # Sort sources by h_index
424
428
  sources_sorted = sorted(sources, key=lambda source: source.summary_stats.two_yr_mean_citedness, reverse=True)
@@ -191,7 +191,7 @@
191
191
  [*Year*], [*Works count*], [*Cited by count*],
192
192
 
193
193
  // Content
194
- ..author.at("counts_by_year").slice(0, 8).map(
194
+ ..author.at("counts_by_year").slice(0, calc.min(author.at("counts_by_year").len(), 8)).map(
195
195
  ((year, works_count, cited_by_count)) => (
196
196
  table.cell([#year]),
197
197
  table.cell([#works_count]),
@@ -216,7 +216,7 @@
216
216
  x-label: none, y-label: none,
217
217
  {
218
218
  plot.add((
219
- ..author.at("counts_by_year").slice(0, 8).map(
219
+ ..author.at("counts_by_year").slice(0, calc.min(author.at("counts_by_year").len(), 8)).map(
220
220
  ((year, works_count, cited_by_count)) => (
221
221
  (year, cited_by_count)
222
222
  )
@@ -3,6 +3,7 @@
3
3
  import datetime
4
4
  import pathlib
5
5
  from enum import Enum
6
+ from time import time
6
7
  from typing import ClassVar
7
8
 
8
9
  import httpx
@@ -105,7 +106,9 @@ class CreateReportWidget(Static):
105
106
  async def mount_report(self) -> None:
106
107
  """Mount report."""
107
108
  try:
109
+ start = time()
108
110
  report_widget = await self.make_report()
111
+ elapsed = time() - start
109
112
  except httpx.HTTPStatusError as exc:
110
113
  self.query_one(LoadingIndicator).display = False
111
114
  status_error = f"HTTP Exception for url: {exc.request.url}. Status code: {exc.response.status_code}"
@@ -117,6 +120,13 @@ class CreateReportWidget(Static):
117
120
  )
118
121
  return None
119
122
 
123
+ self.app.notify(
124
+ title="Report created!",
125
+ message=f"Elapsed {elapsed:.2f}s",
126
+ severity="information",
127
+ timeout=20.0,
128
+ )
129
+
120
130
  container = self.query_one(Container)
121
131
  await container.mount(report_widget)
122
132
 
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "pub-analyzer"
7
- version = "0.5.0"
7
+ version = "0.5.1"
8
8
  description = "A text user interface, written in python, which automates the generation of scientific production reports using OpenAlex"
9
9
 
10
10
  authors = ["Alejandro Gaspar <alejandro@gaspar.land>"]
@@ -42,7 +42,7 @@ pub-analyzer = "pub_analyzer.main:run"
42
42
  python = "^3.10"
43
43
 
44
44
  textual = "0.85.2"
45
- httpx = "0.28.1"
45
+ httpx = {version = "0.28.1", extras = ["http2"]}
46
46
  pydantic = "2.11.7"
47
47
 
48
48
  typst = "0.13.2"
File without changes
File without changes