paperscraper 0.2.5__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {paperscraper-0.2.5 → paperscraper-0.2.6}/PKG-INFO +1 -1
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/__init__.py +1 -1
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/arxiv/arxiv.py +8 -4
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/PKG-INFO +1 -1
- {paperscraper-0.2.5 → paperscraper-0.2.6}/LICENSE +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/README.md +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/arxiv/__init__.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/arxiv/utils.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/__init__.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/biorxiv.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/chemrxiv.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/medrxiv.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/__init__.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/chemrxiv/__init__.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/chemrxiv/chemrxiv_api.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/chemrxiv/utils.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/journal_if.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/load_dumps.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pdf.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/plotting.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/postprocessing.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pubmed/__init__.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pubmed/pubmed.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pubmed/utils.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/scholar/__init__.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/scholar/scholar.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/server_dumps/__init__.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/utils.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/xrxiv/__init__.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/xrxiv/xrxiv_api.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/xrxiv/xrxiv_query.py +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/SOURCES.txt +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/dependency_links.txt +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/not-zip-safe +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/requires.txt +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/top_level.txt +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/setup.cfg +0 -0
- {paperscraper-0.2.5 → paperscraper-0.2.6}/setup.py +0 -0
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from typing import Dict, List, Union
|
|
2
2
|
|
|
3
|
-
import arxiv
|
|
4
3
|
import pandas as pd
|
|
4
|
+
from tqdm import tqdm
|
|
5
|
+
|
|
6
|
+
import arxiv
|
|
5
7
|
|
|
6
8
|
from ..utils import dump_papers
|
|
7
9
|
from .utils import get_query_from_keywords
|
|
@@ -10,6 +12,7 @@ arxiv_field_mapper = {
|
|
|
10
12
|
"published": "date",
|
|
11
13
|
"journal_ref": "journal",
|
|
12
14
|
"summary": "abstract",
|
|
15
|
+
"entry_id": "doi",
|
|
13
16
|
}
|
|
14
17
|
|
|
15
18
|
# Authors, date, and journal fields need specific processing
|
|
@@ -17,6 +20,7 @@ process_fields = {
|
|
|
17
20
|
"authors": lambda authors: ", ".join([a.name for a in authors]),
|
|
18
21
|
"date": lambda date: date.strftime("%Y-%m-%d"),
|
|
19
22
|
"journal": lambda j: j if j is not None else "",
|
|
23
|
+
"doi": lambda entry_id: f"10.48550/arXiv.{entry_id.split('/')[-1].split('v')[0]}",
|
|
20
24
|
}
|
|
21
25
|
|
|
22
26
|
|
|
@@ -57,9 +61,9 @@ def get_arxiv_papers(
|
|
|
57
61
|
arxiv_field_mapper.get(key, key), lambda x: x
|
|
58
62
|
)(value)
|
|
59
63
|
for key, value in vars(paper).items()
|
|
60
|
-
if arxiv_field_mapper.get(key, key) in fields
|
|
64
|
+
if arxiv_field_mapper.get(key, key) in fields and key != "doi"
|
|
61
65
|
}
|
|
62
|
-
for paper in results
|
|
66
|
+
for paper in tqdm(results, desc=f"Processing {query}")
|
|
63
67
|
]
|
|
64
68
|
)
|
|
65
69
|
return processed
|
|
@@ -70,7 +74,7 @@ def get_and_dump_arxiv_papers(
|
|
|
70
74
|
output_filepath: str,
|
|
71
75
|
fields: List = ["title", "authors", "date", "abstract", "journal", "doi"],
|
|
72
76
|
*args,
|
|
73
|
-
**kwargs
|
|
77
|
+
**kwargs,
|
|
74
78
|
):
|
|
75
79
|
"""
|
|
76
80
|
Combines get_arxiv_papers and dump_papers.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/chemrxiv/chemrxiv_api.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|