paperscraper 0.2.5__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {paperscraper-0.2.5 → paperscraper-0.2.6}/PKG-INFO +1 -1
  2. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/__init__.py +1 -1
  3. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/arxiv/arxiv.py +8 -4
  4. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/PKG-INFO +1 -1
  5. {paperscraper-0.2.5 → paperscraper-0.2.6}/LICENSE +0 -0
  6. {paperscraper-0.2.5 → paperscraper-0.2.6}/README.md +0 -0
  7. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/arxiv/__init__.py +0 -0
  8. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/arxiv/utils.py +0 -0
  9. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/__init__.py +0 -0
  10. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/biorxiv.py +0 -0
  11. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/chemrxiv.py +0 -0
  12. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/medrxiv.py +0 -0
  13. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/__init__.py +0 -0
  14. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/chemrxiv/__init__.py +0 -0
  15. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/chemrxiv/chemrxiv_api.py +0 -0
  16. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/get_dumps/utils/chemrxiv/utils.py +0 -0
  17. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/journal_if.py +0 -0
  18. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/load_dumps.py +0 -0
  19. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pdf.py +0 -0
  20. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/plotting.py +0 -0
  21. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/postprocessing.py +0 -0
  22. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pubmed/__init__.py +0 -0
  23. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pubmed/pubmed.py +0 -0
  24. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/pubmed/utils.py +0 -0
  25. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/scholar/__init__.py +0 -0
  26. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/scholar/scholar.py +0 -0
  27. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/server_dumps/__init__.py +0 -0
  28. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/utils.py +0 -0
  29. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/xrxiv/__init__.py +0 -0
  30. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/xrxiv/xrxiv_api.py +0 -0
  31. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper/xrxiv/xrxiv_query.py +0 -0
  32. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/SOURCES.txt +0 -0
  33. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/dependency_links.txt +0 -0
  34. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/not-zip-safe +0 -0
  35. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/requires.txt +0 -0
  36. {paperscraper-0.2.5 → paperscraper-0.2.6}/paperscraper.egg-info/top_level.txt +0 -0
  37. {paperscraper-0.2.5 → paperscraper-0.2.6}/setup.cfg +0 -0
  38. {paperscraper-0.2.5 → paperscraper-0.2.6}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: paperscraper
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: paperscraper: Package to scrape papers.
5
5
  Home-page: https://github.com/PhosphorylatedRabbits/paperscraper
6
6
  Author: Jannis Born, Matteo Manica
@@ -1,6 +1,6 @@
1
1
  """Initialize the module."""
2
2
  __name__ = "paperscraper"
3
- __version__ = "0.2.5"
3
+ __version__ = "0.2.6"
4
4
 
5
5
  import logging
6
6
  import os
@@ -1,7 +1,9 @@
1
1
  from typing import Dict, List, Union
2
2
 
3
- import arxiv
4
3
  import pandas as pd
4
+ from tqdm import tqdm
5
+
6
+ import arxiv
5
7
 
6
8
  from ..utils import dump_papers
7
9
  from .utils import get_query_from_keywords
@@ -10,6 +12,7 @@ arxiv_field_mapper = {
10
12
  "published": "date",
11
13
  "journal_ref": "journal",
12
14
  "summary": "abstract",
15
+ "entry_id": "doi",
13
16
  }
14
17
 
15
18
  # Authors, date, and journal fields need specific processing
@@ -17,6 +20,7 @@ process_fields = {
17
20
  "authors": lambda authors: ", ".join([a.name for a in authors]),
18
21
  "date": lambda date: date.strftime("%Y-%m-%d"),
19
22
  "journal": lambda j: j if j is not None else "",
23
+ "doi": lambda entry_id: f"10.48550/arXiv.{entry_id.split('/')[-1].split('v')[0]}",
20
24
  }
21
25
 
22
26
 
@@ -57,9 +61,9 @@ def get_arxiv_papers(
57
61
  arxiv_field_mapper.get(key, key), lambda x: x
58
62
  )(value)
59
63
  for key, value in vars(paper).items()
60
- if arxiv_field_mapper.get(key, key) in fields
64
+ if arxiv_field_mapper.get(key, key) in fields and key != "doi"
61
65
  }
62
- for paper in results
66
+ for paper in tqdm(results, desc=f"Processing {query}")
63
67
  ]
64
68
  )
65
69
  return processed
@@ -70,7 +74,7 @@ def get_and_dump_arxiv_papers(
70
74
  output_filepath: str,
71
75
  fields: List = ["title", "authors", "date", "abstract", "journal", "doi"],
72
76
  *args,
73
- **kwargs
77
+ **kwargs,
74
78
  ):
75
79
  """
76
80
  Combines get_arxiv_papers and dump_papers.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: paperscraper
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: paperscraper: Package to scrape papers.
5
5
  Home-page: https://github.com/PhosphorylatedRabbits/paperscraper
6
6
  Author: Jannis Born, Matteo Manica
File without changes
File without changes
File without changes
File without changes