PyPI - pub-analyzer - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

pub-analyzer 0.4.3py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pub-analyzer might be problematic. Click here for more details.

Files changed (28) hide show

pub_analyzer/css/editor.tcss +60 -0
pub_analyzer/css/report.tcss +15 -6
pub_analyzer/internal/limiter.py +34 -0
pub_analyzer/internal/render.py +17 -44
pub_analyzer/internal/report.py +60 -29
pub_analyzer/internal/templates/author_report.typ +556 -0
pub_analyzer/main.py +4 -7
pub_analyzer/models/institution.py +3 -3
pub_analyzer/widgets/author/tables.py +2 -1
pub_analyzer/widgets/body.py +19 -0
pub_analyzer/widgets/common/__init__.py +2 -0
pub_analyzer/widgets/common/label.py +36 -0
pub_analyzer/widgets/report/core.py +10 -0
pub_analyzer/widgets/report/editor.py +80 -0
pub_analyzer/widgets/report/export.py +2 -4
pub_analyzer/widgets/report/work.py +145 -6
pub_analyzer/widgets/search/__init__.py +2 -2
pub_analyzer/widgets/search/results.py +2 -12
{pub_analyzer-0.4.3.dist-info → pub_analyzer-0.5.1.dist-info}/METADATA +7 -8
{pub_analyzer-0.4.3.dist-info → pub_analyzer-0.5.1.dist-info}/RECORD +23 -23
{pub_analyzer-0.4.3.dist-info → pub_analyzer-0.5.1.dist-info}/WHEEL +1 -1
pub_analyzer/internal/templates/author/author_summary.typ +0 -112
pub_analyzer/internal/templates/author/report.typ +0 -91
pub_analyzer/internal/templates/author/sources.typ +0 -22
pub_analyzer/internal/templates/author/works.typ +0 -154
pub_analyzer/internal/templates/author/works_extended.typ +0 -109
{pub_analyzer-0.4.3.dist-info → pub_analyzer-0.5.1.dist-info}/LICENSE +0 -0
{pub_analyzer-0.4.3.dist-info → pub_analyzer-0.5.1.dist-info}/entry_points.txt +0 -0

pub_analyzer/css/editor.tcss ADDED Viewed

@@ -0,0 +1,60 @@
+/* COLORS */
+$bg-main-color: white;
+$bg-secondary-color: #e5e7eb;
+$bg-secondary-color-accent: #d1d5db;
+$text-primary-color: black;
+$bg-main-color-darken: #1e293b;
+$bg-secondary-color-darken: #0f172a;
+$text-primary-color-darken: black;
+$primary-color: #b91c1c;
+$primary-color-accent: #991b1b;
+$primary-color-highlight: #dc2626;
+TextEditor {
+    #dialog {
+        margin: 0 10;
+        min-height: 10vh;
+        max-height: 60vh;
+    }
+    #text-editor-container {
+        height: 1fr;
+    }
+    TextArea{
+        height: auto;
+        padding: 1 3;
+        background: $bg-main-color;
+        border: none;
+        .text-area--cursor {
+            background: $primary-color;
+        }
+        .text-area--cursor-gutter {
+            color: $bg-main-color;
+            background: $primary-color-accent;
+        }
+        .text-area--cursor-line {
+            background: $bg-main-color;
+        }
+        .text-area--matching-bracket {
+            background: $primary-color-highlight 30%;
+        }
+    }
+    #actions-buttons {
+        height: 3;
+        margin-top: 1;
+        margin-bottom: 2;
+        align: center middle;
+        Button {
+            margin: 0 5;
+        }
+    }
+}

pub_analyzer/css/report.tcss CHANGED Viewed

@@ -67,29 +67,29 @@ LoadReportWidget .button-container {
 }
 /* Export Report Pane */
-ExportReportPane #export-form {
+#export-form {
     height: auto;
 }
-ExportReportPane .export-form-input-container {
+.export-form-input-container {
     height: auto;
     margin-bottom: 2;
 }
-ExportReportPane .export-form-label {
+.export-form-label {
     width: 25vw;
     border-bottom: solid $text-primary-color;
 }
-ExportReportPane .file-selector-container {
+.file-selector-container {
     height: 3;
 }
-ExportReportPane .export-form-input {
+.export-form-input {
     width: 50vw;
 }
-ExportReportPane .export-form-buttons {
+.export-form-buttons {
     align: center middle;
     height: 3;
 }
@@ -113,6 +113,15 @@ WorkModal #dialog .abstract {
     padding: 1 2;
 }
+WorkModal TabPane EditWidget {
+    height: 3;
+    margin-top: 1;
+    Horizontal {
+        align: center middle;
+    }
+}
 WorkModal #dialog #tables-container {
     margin: 1 0;
 }

pub_analyzer/internal/limiter.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""Rate limiter module."""
+import asyncio
+import time
+class RateLimiter:
+    """Rate limiter."""
+    def __init__(self, rate: int, per_second: float = 1.0) -> None:
+        self.rate = rate
+        self.per = per_second
+        self._tokens = float(rate)
+        self._updated_at = time.monotonic()
+        self._lock = asyncio.Lock()
+    async def acquire(self) -> None:
+        """Wait until new token is available."""
+        while True:
+            async with self._lock:
+                now = time.monotonic()
+                elapsed = now - self._updated_at
+                if elapsed > 0:
+                    self._tokens = min(self.rate, self._tokens + elapsed * (self.rate / self.per))
+                    self._updated_at = now
+                if self._tokens >= 1.0:
+                    self._tokens -= 1.0
+                    return
+                missing = 1.0 - self._tokens
+                wait_time = missing * (self.per / self.rate)
+            await asyncio.sleep(wait_time)

pub_analyzer/internal/render.py CHANGED Viewed

@@ -1,68 +1,41 @@
 """Render reports."""
 import pathlib
+import time
 from importlib.metadata import version
 import typst
-from jinja2 import Environment, FileSystemLoader
+from textual import log
 from pub_analyzer.models.report import AuthorReport, InstitutionReport
-async def render_template_report(report: AuthorReport | InstitutionReport) -> str:
-    """Render report template.
-    Render the report to typst format using the templates.
+def render_report(report: AuthorReport | InstitutionReport, file_path: pathlib.Path | None) -> bytes | None:
+    """Render report to PDF.
     Args:
         report: Report Model.
+        file_path: Path to save the compiled file.
     Returns:
-        Report in Typst language.
+        PDF bytes or None if output file path is defined.
     Raises:
-        NotImplementedError: If report is `InstitutionReport` type.
+        SyntaxError: If typst compiler syntax error.
     """
     if isinstance(report, AuthorReport):
-        templates_path = pathlib.Path(__file__).parent.resolve().joinpath("templates/author")
+        templates_path = pathlib.Path(__file__).parent.resolve().joinpath("templates")
+        typst_file = templates_path / "author_report.typ"
     if isinstance(report, InstitutionReport):
         raise NotImplementedError
-    # Render template
-    env = Environment(loader=FileSystemLoader(searchpath=templates_path), enable_async=True, trim_blocks=True, lstrip_blocks=True)
-    return await env.get_template("report.typ").render_async(report=report, version=version("pub-analyzer"))
-async def render_report(report: AuthorReport | InstitutionReport, file_path: pathlib.Path) -> bytes:
-    """Render report to PDF.
-    The specified path is not where the PDF file will be saved. The path is where the typst
-    file will be created (You can create a temporary path using the `tempfile` package).
-    This is done in this way because at the moment the typst package can only read the
-    document to be compiled from a file.
-    Args:
-        report: Report Model.
-        file_path: Temporary directory for the typst file.
+    sys_inputs = {"report": report.model_dump_json(by_alias=True), "version": version("pub-analyzer")}
-    Returns:
-        PDF bytes.
-    Raises:
-        SyntaxError: If typst compiler syntax error.
-    """
-    template_render = await render_template_report(report=report)
-    # Write template to typst file
-    root = file_path.parent
-    temp_file = open(root.joinpath(file_path.stem + ".typ"), mode="w", encoding="utf-8")
-    temp_file.write(template_render)
-    temp_file.close()
-    # Render typst file
-    pdf_render = typst.compile(temp_file.name)
-    if isinstance(pdf_render, bytes):
-        return pdf_render
+    start_time = time.time()
+    if file_path:
+        result = typst.compile(input=typst_file, output=file_path, sys_inputs=sys_inputs)
     else:
-        raise SyntaxError
+        result = typst.compile(input=typst_file, sys_inputs=sys_inputs)
+    log.info(f"Typst compile time: {round((time.time() - start_time), 2)} seconds.")
+    return result

pub_analyzer/internal/report.py CHANGED Viewed

@@ -6,8 +6,10 @@ from typing import Any, NewType
 import httpx
 from pydantic import TypeAdapter
+from textual import log
 from pub_analyzer.internal import identifier
+from pub_analyzer.internal.limiter import RateLimiter
 from pub_analyzer.models.author import Author, AuthorOpenAlexKey, AuthorResult, DehydratedAuthor
 from pub_analyzer.models.institution import DehydratedInstitution, Institution, InstitutionOpenAlexKey, InstitutionResult
 from pub_analyzer.models.report import (
@@ -30,6 +32,10 @@ FromDate = NewType("FromDate", datetime.datetime)
 ToDate = NewType("ToDate", datetime.datetime)
 """DateTime marker for works published up to this date."""
+REQUEST_RATE_PER_SECOND = 8
+"""The OpenAlex API requires a maximum of 10 requests per second. We limit this to 8 per second."""
+PER_PAGE_SIZE = 100
 def _get_author_profiles_keys(
     author: Author, extra_profiles: list[Author | AuthorResult | DehydratedAuthor] | None
@@ -138,10 +144,17 @@ def _get_valid_works(works: list[dict[str, Any]]) -> list[dict[str, Any]]:
         In response, we have chosen to exclude such works at this stage, thus avoiding
         the need to handle exceptions within the Model validators.
     """
-    return [_add_work_abstract(work) for work in works if work["title"] is not None]
+    valid_works = []
+    for work in works:
+        if work["title"] is not None:
+            valid_works.append(_add_work_abstract(work))
+        else:
+            log.warning(f"Discarded work: {work['id']}")
+    return valid_works
-async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
+async def _get_works(client: httpx.AsyncClient, url: str, limiter: RateLimiter) -> list[Work]:
     """Get all works given a URL.
     Iterate over all pages of the URL
@@ -156,7 +169,8 @@ async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
     Raises:
         httpx.HTTPStatusError: One response from OpenAlex API had an error HTTP status of 4xx or 5xx.
     """
-    response = await client.get(url=url)
+    await limiter.acquire()
+    response = await client.get(url=url, follow_redirects=True)
     response.raise_for_status()
     json_response = response.json()
@@ -166,13 +180,14 @@ async def _get_works(client: httpx.AsyncClient, url: str) -> list[Work]:
     works_data = list(_get_valid_works(json_response["results"]))
     for page_number in range(1, page_count):
-        page_result = (await client.get(url + f"&page={page_number + 1}")).json()
+        await limiter.acquire()
+        page_result = (await client.get(url + f"&page={page_number + 1}", follow_redirects=True)).json()
         works_data.extend(_get_valid_works(page_result["results"]))
     return TypeAdapter(list[Work]).validate_python(works_data)
-async def _get_source(client: httpx.AsyncClient, url: str) -> Source:
+async def _get_source(client: httpx.AsyncClient, url: str, limiter: RateLimiter) -> Source:
     """Get source given a URL.
     Args:
@@ -185,10 +200,18 @@ async def _get_source(client: httpx.AsyncClient, url: str) -> Source:
     Raises:
         httpx.HTTPStatusError: One response from OpenAlex API had an error HTTP status of 4xx or 5xx.
     """
-    response = await client.get(url=url)
+    await limiter.acquire()
+    response = await client.get(url=url, follow_redirects=True)
     response.raise_for_status()
-    return Source(**response.json())
+    json_response = response.json()
+    hp_url = json_response["homepage_url"]
+    if isinstance(hp_url, str):
+        if not hp_url.startswith(("http", "https")):
+            json_response["homepage_url"] = None
+            log.warning(f"Discarted source homepage url: {url}")
+    return Source(**json_response)
 async def make_author_report(
@@ -222,13 +245,12 @@ async def make_author_report(
     pub_from_filter = f",from_publication_date:{pub_from_date:%Y-%m-%d}" if pub_from_date else ""
     pub_to_filter = f",to_publication_date:{pub_to_date:%Y-%m-%d}" if pub_to_date else ""
-    url = (
-        f"https://api.openalex.org/works?filter=author.id:{profiles_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date"
-    )
+    url = f"https://api.openalex.org/works?filter=author.id:{profiles_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
-    async with httpx.AsyncClient() as client:
+    limiter = RateLimiter(rate=REQUEST_RATE_PER_SECOND, per_second=1.0)
+    async with httpx.AsyncClient(http2=True, timeout=None) as client:
         # Getting all the author works.
-        author_works = await _get_works(client, url)
+        author_works = await _get_works(client, url, limiter)
         # Extra filters
         cited_from_filter = f",from_publication_date:{cited_from_date:%Y-%m-%d}" if cited_from_date else ""
@@ -242,12 +264,13 @@ async def make_author_report(
         dehydrated_sources: list[DehydratedSource] = []
         # Getting all works that have cited the author.
-        for author_work in author_works:
+        author_works_count = len(author_works)
+        for idx_work, author_work in enumerate(author_works, 1):
             work_id = identifier.get_work_id(author_work)
+            log.info(f"[{work_id}] Work [{idx_work}/{author_works_count}]")
             work_authors = _get_authors_list(authorships=author_work.authorships)
-            cited_by_api_url = (
-                f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date"
-            )
+            cited_by_api_url = f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
             # Adding the type of OpenAccess in the counter.
             open_access_summary.add_oa_type(author_work.open_access.oa_status)
@@ -264,7 +287,7 @@ async def make_author_report(
                 if location.source and not any(source.id == location.source.id for source in dehydrated_sources):
                     dehydrated_sources.append(location.source)
-            cited_by_works = await _get_works(client, cited_by_api_url)
+            cited_by_works = await _get_works(client, cited_by_api_url, limiter)
             cited_by: list[CitationReport] = []
             work_citation_summary = CitationSummary()
             for cited_by_work in cited_by_works:
@@ -281,10 +304,13 @@ async def make_author_report(
         # Get sources full info.
         sources: list[Source] = []
-        for dehydrated_source in dehydrated_sources:
+        sources_count = len(dehydrated_sources)
+        for idx, dehydrated_source in enumerate(dehydrated_sources, 1):
             source_id = identifier.get_source_id(dehydrated_source)
             source_url = f"https://api.openalex.org/sources/{source_id}"
-            sources.append(await _get_source(client, source_url))
+            log.info(f"Getting Sources... [{idx}/{sources_count}]")
+            sources.append(await _get_source(client, source_url, limiter))
         # Sort sources by h_index
         sources_sorted = sorted(sources, key=lambda source: source.summary_stats.two_yr_mean_citedness, reverse=True)
@@ -331,11 +357,12 @@ async def make_institution_report(
     pub_from_filter = f",from_publication_date:{pub_from_date:%Y-%m-%d}" if pub_from_date else ""
     pub_to_filter = f",to_publication_date:{pub_to_date:%Y-%m-%d}" if pub_to_date else ""
-    url = f"https://api.openalex.org/works?filter=institutions.id:{institution_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date"
+    url = f"https://api.openalex.org/works?filter=institutions.id:{institution_query_parameter}{pub_from_filter}{pub_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
-    async with httpx.AsyncClient() as client:
+    limiter = RateLimiter(rate=REQUEST_RATE_PER_SECOND, per_second=1.0)
+    async with httpx.AsyncClient(http2=True, timeout=None) as client:
         # Getting all the institution works.
-        institution_works = await _get_works(client=client, url=url)
+        institution_works = await _get_works(client=client, url=url, limiter=limiter)
         # Extra filters
         cited_from_filter = f",from_publication_date:{cited_from_date:%Y-%m-%d}" if cited_from_date else ""
@@ -349,12 +376,13 @@ async def make_institution_report(
         dehydrated_sources: list[DehydratedSource] = []
         # Getting all works that have cited a work.
-        for institution_work in institution_works:
+        institution_works_count = len(institution_works)
+        for idx_work, institution_work in enumerate(institution_works, 1):
             work_id = identifier.get_work_id(institution_work)
+            log.info(f"[{work_id}] Work [{idx_work}/{institution_works_count}]")
             work_authors = _get_authors_list(authorships=institution_work.authorships)
-            cited_by_api_url = (
-                f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date"
-            )
+            cited_by_api_url = f"https://api.openalex.org/works?filter=cites:{work_id}{cited_from_filter}{cited_to_filter}&sort=publication_date&per-page={PER_PAGE_SIZE}"
             # Adding the type of OpenAccess in the counter.
             open_access_summary.add_oa_type(institution_work.open_access.oa_status)
@@ -371,7 +399,7 @@ async def make_institution_report(
                 if location.source and not any(source.id == location.source.id for source in dehydrated_sources):
                     dehydrated_sources.append(location.source)
-            cited_by_works = await _get_works(client, cited_by_api_url)
+            cited_by_works = await _get_works(client, cited_by_api_url, limiter)
             cited_by: list[CitationReport] = []
             work_citation_summary = CitationSummary()
             for cited_by_work in cited_by_works:
@@ -388,10 +416,13 @@ async def make_institution_report(
         # Get sources full info.
         sources: list[Source] = []
-        for dehydrated_source in dehydrated_sources:
+        sources_count = len(dehydrated_sources)
+        for idx, dehydrated_source in enumerate(dehydrated_sources, 1):
             source_id = identifier.get_source_id(dehydrated_source)
             source_url = f"https://api.openalex.org/sources/{source_id}"
-            sources.append(await _get_source(client, source_url))
+            log.debug(f"[{work_id}] Getting Sources... [{idx}/{sources_count}]")
+            sources.append(await _get_source(client, source_url, limiter))
         # Sort sources by h_index
         sources_sorted = sorted(sources, key=lambda source: source.summary_stats.two_yr_mean_citedness, reverse=True)

pub-analyzer 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

pub-analyzer 0.4.3py3-none-any.whl → 0.5.1py3-none-any.whl