PyPI - paperscraper - Versions diffs - 0.2.5__tar.gz → 0.2.6__tar.gz - Mend - Supply Chain Defender

paperscraper 0.2.5tar.gz → 0.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

{paperscraper-0.2.5 → paperscraper-0.2.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: paperscraper
-Version: 0.2.5
+Version: 0.2.6
 Summary: paperscraper: Package to scrape papers.
 Home-page: https://github.com/PhosphorylatedRabbits/paperscraper
 Author: Jannis Born, Matteo Manica

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """Initialize the module."""
 __name__ = "paperscraper"
-__version__ = "0.2.5"
+__version__ = "0.2.6"
 import logging
 import os

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/arxiv/arxiv.py RENAMED Viewed

@@ -1,7 +1,9 @@
 from typing import Dict, List, Union
-import arxiv
 import pandas as pd
+from tqdm import tqdm
+import arxiv
 from ..utils import dump_papers
 from .utils import get_query_from_keywords
@@ -10,6 +12,7 @@ arxiv_field_mapper = {
     "published": "date",
     "journal_ref": "journal",
     "summary": "abstract",
+    "entry_id": "doi",
 }
 # Authors, date, and journal fields need specific processing
@@ -17,6 +20,7 @@ process_fields = {
     "authors": lambda authors: ", ".join([a.name for a in authors]),
     "date": lambda date: date.strftime("%Y-%m-%d"),
     "journal": lambda j: j if j is not None else "",
+    "doi": lambda entry_id: f"10.48550/arXiv.{entry_id.split('/')[-1].split('v')[0]}",
 }
@@ -57,9 +61,9 @@ def get_arxiv_papers(
                     arxiv_field_mapper.get(key, key), lambda x: x
                 )(value)
                 for key, value in vars(paper).items()
-                if arxiv_field_mapper.get(key, key) in fields
+                if arxiv_field_mapper.get(key, key) in fields and key != "doi"
             }
-            for paper in results
+            for paper in tqdm(results, desc=f"Processing {query}")
         ]
     )
     return processed
@@ -70,7 +74,7 @@ def get_and_dump_arxiv_papers(
     output_filepath: str,
     fields: List = ["title", "authors", "date", "abstract", "journal", "doi"],
     *args,
-    **kwargs
+    **kwargs,
 ):
     """
     Combines get_arxiv_papers and dump_papers.

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: paperscraper
-Version: 0.2.5
+Version: 0.2.6
 Summary: paperscraper: Package to scrape papers.
 Home-page: https://github.com/PhosphorylatedRabbits/paperscraper
 Author: Jannis Born, Matteo Manica

{paperscraper-0.2.5 → paperscraper-0.2.6}/LICENSE RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/README.md RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/arxiv/__init__.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/arxiv/utils.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/__init__.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/biorxiv.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/chemrxiv.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/medrxiv.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/__init__.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/chemrxiv/__init__.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/chemrxiv/chemrxiv_api.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/chemrxiv/utils.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/journal_if.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/load_dumps.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pdf.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/plotting.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/postprocessing.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pubmed/__init__.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pubmed/pubmed.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pubmed/utils.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/scholar/__init__.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/scholar/scholar.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/server_dumps/__init__.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/utils.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/xrxiv/__init__.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/xrxiv/xrxiv_api.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/xrxiv/xrxiv_query.py RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/not-zip-safe RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/requires.txt RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/top_level.txt RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/setup.cfg RENAMED Viewed

File without changes

{paperscraper-0.2.5 → paperscraper-0.2.6}/setup.py RENAMED Viewed

File without changes