PyPI - paperscraper - Versions diffs - 0.2.8__tar.gz → 0.2.10__tar.gz - Mend

paperscraper 0.2.8tar.gz → 0.2.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{paperscraper-0.2.8 → paperscraper-0.2.10}/PKG-INFO RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.1
 Name: paperscraper
-Version: 0.2.8
+Version: 0.2.10
 Summary: paperscraper: Package to scrape papers.
 Home-page: https://github.com/PhosphorylatedRabbits/paperscraper
 Author: Jannis Born, Matteo Manica
 Author-email: jannis.born@gmx.de, drugilsberg@gmail.com
 License: MIT
-Keywords: Academics,Science,Publication,Search,PubMed,Arxiv,Medrxiv,Biorxiv,Chemrxiv
+Keywords: Academics,Science,Publication,Search,PubMed,Arxiv,Medrxiv,Biorxiv,Chemrxiv,Google Scholar
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
@@ -27,6 +27,9 @@ Requires-Dist: seaborn
 Requires-Dist: matplotlib
 Requires-Dist: matplotlib_venn
 Requires-Dist: bs4
+Requires-Dist: impact-factor>=1.1.0
+Requires-Dist: thefuzz
+Requires-Dist: pytest
 [![build](https://github.com/PhosphorylatedRabbits/paperscraper/actions/workflows/build.yml/badge.svg)](https://github.com/PhosphorylatedRabbits/paperscraper/actions/workflows/build.yml)
 [![License:
@@ -179,14 +182,41 @@ get_citations_from_title(title)
 *NOTE*: The scholar endpoint does not require authentification but since it regularly
 prompts with captchas, it's difficult to apply large scale.
-#### Journal impact factor
+### Journal impact factor
-You can also retrieve the impact factor for all journals indexed by citefactor:
+You can also retrieve the impact factor for all journals:
 ```py
-from paperscraper.journal_if import Impactor
-i = Impactor()
+>>>from paperscraper.impact import Impactor
+>>>i = Impactor()
+>>>i.search("Nat Comms", threshold=85, sort_by='impact')
+[
+    {'journal': 'Nature Communications', 'factor': 17.694, 'score': 94},
+    {'journal': 'Natural Computing', 'factor': 1.504, 'score': 88}
+]
+```
+This performs a fuzzy search with a threshold of 85. `threshold` defaults to 100 in which case an exact search
+is performed. You can also search by journal abbreviation, [E-ISSN](https://portal.issn.org) or [NLM ID](https://portal.issn.org).
+```py
+i.search("Nat Rev Earth Environ") # [{'journal': 'Nature Reviews Earth & Environment', 'factor': 37.214, 'score': 100}]
+i.search("101771060") # [{'journal': 'Nature Reviews Earth & Environment', 'factor': 37.214, 'score': 100}]
+i.search('2662-138X') # [{'journal': 'Nature Reviews Earth & Environment', 'factor': 37.214, 'score': 100}]
+# Filter results by impact factor
+i.search("Neural network", threshold=85, min_impact=1.5, max_impact=20)
+# [
+#   {'journal': 'IEEE Transactions on Neural Networks and Learning Systems', 'factor': 14.255, 'score': 93},
+#   {'journal': 'NEURAL NETWORKS', 'factor': 9.657, 'score': 91},
+#   {'journal': 'WORK-A Journal of Prevention Assessment & Rehabilitation', 'factor': 1.803, 'score': 86},
+#   {'journal': 'NETWORK-COMPUTATION IN NEURAL SYSTEMS', 'factor': 1.5, 'score': 92}
+# ]
+# Show all fields
+i.search("quantum information", threshold=90, return_all=True)
+# [
+#   {'factor': 10.758, 'jcr': 'Q1', 'journal_abbr': 'npj Quantum Inf', 'eissn': '2056-6387', 'journal': 'npj Quantum Information', 'nlm_id': '101722857', 'issn': '', 'score': 92},
+#   {'factor': 1.577, 'jcr': 'Q3', 'journal_abbr': 'Nation', 'eissn': '0027-8378', 'journal': 'NATION', 'nlm_id': '9877123', 'issn': '0027-8378', 'score': 91}
+# ]
 ```
-Then, `i.journal_to_if` should give you a dictionary wit journal to IF mappings for >9000 journals as of 2014.
 ### Plotting

{paperscraper-0.2.8 → paperscraper-0.2.10}/README.md RENAMED Viewed

@@ -149,14 +149,41 @@ get_citations_from_title(title)
 *NOTE*: The scholar endpoint does not require authentification but since it regularly
 prompts with captchas, it's difficult to apply large scale.
-#### Journal impact factor
+### Journal impact factor
-You can also retrieve the impact factor for all journals indexed by citefactor:
+You can also retrieve the impact factor for all journals:
 ```py
-from paperscraper.journal_if import Impactor
-i = Impactor()
+>>>from paperscraper.impact import Impactor
+>>>i = Impactor()
+>>>i.search("Nat Comms", threshold=85, sort_by='impact')
+[
+    {'journal': 'Nature Communications', 'factor': 17.694, 'score': 94},
+    {'journal': 'Natural Computing', 'factor': 1.504, 'score': 88}
+]
+```
+This performs a fuzzy search with a threshold of 85. `threshold` defaults to 100 in which case an exact search
+is performed. You can also search by journal abbreviation, [E-ISSN](https://portal.issn.org) or [NLM ID](https://portal.issn.org).
+```py
+i.search("Nat Rev Earth Environ") # [{'journal': 'Nature Reviews Earth & Environment', 'factor': 37.214, 'score': 100}]
+i.search("101771060") # [{'journal': 'Nature Reviews Earth & Environment', 'factor': 37.214, 'score': 100}]
+i.search('2662-138X') # [{'journal': 'Nature Reviews Earth & Environment', 'factor': 37.214, 'score': 100}]
+# Filter results by impact factor
+i.search("Neural network", threshold=85, min_impact=1.5, max_impact=20)
+# [
+#   {'journal': 'IEEE Transactions on Neural Networks and Learning Systems', 'factor': 14.255, 'score': 93},
+#   {'journal': 'NEURAL NETWORKS', 'factor': 9.657, 'score': 91},
+#   {'journal': 'WORK-A Journal of Prevention Assessment & Rehabilitation', 'factor': 1.803, 'score': 86},
+#   {'journal': 'NETWORK-COMPUTATION IN NEURAL SYSTEMS', 'factor': 1.5, 'score': 92}
+# ]
+# Show all fields
+i.search("quantum information", threshold=90, return_all=True)
+# [
+#   {'factor': 10.758, 'jcr': 'Q1', 'journal_abbr': 'npj Quantum Inf', 'eissn': '2056-6387', 'journal': 'npj Quantum Information', 'nlm_id': '101722857', 'issn': '', 'score': 92},
+#   {'factor': 1.577, 'jcr': 'Q3', 'journal_abbr': 'Nation', 'eissn': '0027-8378', 'journal': 'NATION', 'nlm_id': '9877123', 'issn': '0027-8378', 'score': 91}
+# ]
 ```
-Then, `i.journal_to_if` should give you a dictionary wit journal to IF mappings for >9000 journals as of 2014.
 ### Plotting

{paperscraper-0.2.8 → paperscraper-0.2.10}/paperscraper/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """Initialize the module."""
 __name__ = "paperscraper"
-__version__ = "0.2.8"
+__version__ = "0.2.10"
 import logging
 import os

paperscraper-0.2.10/paperscraper/impact.py ADDED Viewed

@@ -0,0 +1,111 @@
+import logging
+from typing import Any, Dict, List, Optional
+import pandas as pd
+from impact_factor.core import Factor
+from thefuzz import fuzz
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+logging.disable(logging.INFO)
+class Impactor:
+    def __init__(self):
+        """
+        Initialize the Impactor class with an instance of the Factor class.
+        This allows access to the database of journal impact factors.
+        """
+        self.fa = Factor()
+        self.all_journals = self.fa.search("%")
+        self.metadata = pd.DataFrame(self.all_journals, dtype=str)
+        logger.info(f"Loaded metadata for {len(self.metadata)} journals")
+    def search(
+        self,
+        query: str,
+        threshold: int = 100,
+        sort_by: Optional[str] = None,
+        min_impact: float = 0.0,
+        max_impact: float = float("inf"),
+        return_all: bool = False,
+    ) -> List[Dict[str, Any]]:
+        """
+        Search for journals matching the given query with an optional fuzziness
+            level and sorting.
+        Args:
+            query: The journal name or abbreviation to search for.
+            threshold: The threshold for fuzzy matching. If set to 100, exact matching
+                is performed. If set below 100, fuzzy matching is used. Defaults to 100.
+            sort_by: Criterion for sorting results, one of 'impact', 'journal' and 'score'.
+            min_impact: Minimum impact factor for journals to be considered, defaults to 0.
+            max_impact: Maximum impact factor for journals to be considered, defaults to infinity.
+            return_all: If True, returns all columns of the DataFrame for each match.
+        Returns:
+            List[dict]: A list of dictionaries containing the journal information.
+        """
+        # Validation of parameters
+        if not isinstance(query, str) or not isinstance(threshold, int):
+            raise TypeError(
+                f"Query must be a str and threshold must be an int, not {type(query)} and {type(threshold)}"
+            )
+        if threshold < 0 or threshold > 100:
+            raise ValueError(
+                f"Fuzziness threshold must be between 0 and 100, not {threshold}"
+            )
+        if str.isdigit(query) and threshold >= 100:
+            # When querying with NLM ID, exact matching does not work since impact_factor
+            # strips off leading zeros, so we use fuzzy matching instead
+            threshold = 99
+        # Define a function to calculate fuzziness score
+        def calculate_fuzziness_score(row):
+            return max(fuzz.partial_ratio(query, str(value)) for value in row.values)
+        # Search with or without fuzzy matching
+        if threshold >= 100:
+            matched_df = self.metadata[
+                self.metadata.apply(
+                    lambda x: query.lower() in x.astype(str).str.lower().values, axis=1
+                )
+            ].copy()
+            # Exact matches get a default score of 100
+            matched_df["score"] = 100
+        else:
+            matched_df = self.metadata[
+                self.metadata.apply(
+                    lambda x: calculate_fuzziness_score(x) >= threshold, axis=1
+                )
+            ].copy()
+            matched_df["score"] = matched_df.apply(calculate_fuzziness_score, axis=1)
+        # Sorting based on the specified criterion
+        if sort_by == "score":
+            matched_df = matched_df.sort_values(by="score", ascending=False)
+        elif sort_by == "journal":
+            matched_df = matched_df.sort_values(by="journal")
+        elif sort_by == "impact":
+            matched_df = matched_df.sort_values(by="factor", ascending=False)
+        matched_df["factor"] = pd.to_numeric(matched_df["factor"])
+        matched_df = matched_df[
+            (matched_df["factor"] >= min_impact) & (matched_df["factor"] <= max_impact)
+        ]
+        # Prepare the final result
+        results = [
+            row.to_dict()
+            if return_all
+            else {
+                "journal": row["journal"],
+                "factor": row["factor"],
+                "score": row["score"],
+            }
+            for _, row in matched_df.iterrows()
+        ]
+        return results

paperscraper-0.2.10/paperscraper/tests/__init__.py ADDED Viewed

File without changes

paperscraper-0.2.10/paperscraper/tests/test_impactor.py ADDED Viewed

@@ -0,0 +1,69 @@
+import logging
+import pytest
+from paperscraper.impact import Impactor
+logging.disable(logging.INFO)
+class TestImpactor:
+    @pytest.fixture
+    def impactor(self):
+        return Impactor()
+    def test_basic_search(self, impactor: Impactor):
+        results = impactor.search("Nat Comm", threshold=99, sort_by="score")
+        assert len(results) > 0  # Ensure we get some results
+        assert all(
+            "journal" in r and "factor" in r and "score" in r for r in results
+        )  # Basic fields are present
+    def test_fuzzy_search(self, impactor: Impactor):
+        results = impactor.search("Nat Comm", threshold=99)
+        assert any(
+            r["journal"] == "Nature Communications" for r in results
+        )  # Check for a specific journal
+    def test_sort_by_score(self, impactor: Impactor):
+        results = impactor.search("nature chem", threshold=80, sort_by="score")
+        scores = [r["score"] for r in results]
+        assert scores == sorted(
+            scores, reverse=True
+        )  # Ensure results are sorted by score
+    def test_impact_factor_filtering(self, impactor: Impactor):
+        results = impactor.search("Quantum information", threshold=70, min_impact=8)
+        assert all(
+            8 <= r["factor"] for r in results
+        )  # Check if all results have a factor >= 8
+    def test_return_all_fields(self, impactor: Impactor):
+        results = impactor.search("nature chem", return_all=True)
+        assert all(
+            len(r) > 3 for r in results
+        )  # Check if more than the basic fields are returned
+    def test_quantum_information_search(self, impactor):
+        expected_results = [
+            {"journal": "InfoMat", "factor": 24.798, "score": 71},
+            {"journal": "Information Fusion", "factor": 17.564, "score": 71},
+            {"journal": "npj Quantum Information", "factor": 10.758, "score": 95},
+        ]
+        results = impactor.search(
+            "Quantum information", threshold=70, sort_by="factor", min_impact=8
+        )
+        # Ensure that the results match the expected results
+        assert len(results) == len(expected_results), "Number of results does not match"
+        for expected, actual in zip(expected_results, results):
+            assert (
+                expected["journal"] == actual["journal"]
+            ), f"Journal name does not match for {expected['journal']}"
+            assert (
+                abs(expected["factor"] - actual["factor"]) < 0.001
+            ), f"Impact factor does not match for {expected['journal']}"
+            assert (
+                expected["score"] == actual["score"]
+            ), f"Score does not match for {expected['journal']}"

{paperscraper-0.2.8 → paperscraper-0.2.10}/paperscraper.egg-info/PKG-INFO RENAMED Viewed

@@ -1,12 +1,12 @@
 Metadata-Version: 2.1
 Name: paperscraper
-Version: 0.2.8
+Version: 0.2.10
 Summary: paperscraper: Package to scrape papers.
 Home-page: https://github.com/PhosphorylatedRabbits/paperscraper
 Author: Jannis Born, Matteo Manica
 Author-email: jannis.born@gmx.de, drugilsberg@gmail.com
 License: MIT
-Keywords: Academics,Science,Publication,Search,PubMed,Arxiv,Medrxiv,Biorxiv,Chemrxiv
+Keywords: Academics,Science,Publication,Search,PubMed,Arxiv,Medrxiv,Biorxiv,Chemrxiv,Google Scholar
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers
 Classifier: Intended Audience :: Science/Research
@@ -27,6 +27,9 @@ Requires-Dist: seaborn
 Requires-Dist: matplotlib
 Requires-Dist: matplotlib_venn
 Requires-Dist: bs4
+Requires-Dist: impact-factor>=1.1.0
+Requires-Dist: thefuzz
+Requires-Dist: pytest
 [![build](https://github.com/PhosphorylatedRabbits/paperscraper/actions/workflows/build.yml/badge.svg)](https://github.com/PhosphorylatedRabbits/paperscraper/actions/workflows/build.yml)
 [![License:
@@ -179,14 +182,41 @@ get_citations_from_title(title)
 *NOTE*: The scholar endpoint does not require authentification but since it regularly
 prompts with captchas, it's difficult to apply large scale.
-#### Journal impact factor
+### Journal impact factor
-You can also retrieve the impact factor for all journals indexed by citefactor:
+You can also retrieve the impact factor for all journals:
 ```py
-from paperscraper.journal_if import Impactor
-i = Impactor()
+>>>from paperscraper.impact import Impactor
+>>>i = Impactor()
+>>>i.search("Nat Comms", threshold=85, sort_by='impact')
+[
+    {'journal': 'Nature Communications', 'factor': 17.694, 'score': 94},
+    {'journal': 'Natural Computing', 'factor': 1.504, 'score': 88}
+]
+```
+This performs a fuzzy search with a threshold of 85. `threshold` defaults to 100 in which case an exact search
+is performed. You can also search by journal abbreviation, [E-ISSN](https://portal.issn.org) or [NLM ID](https://portal.issn.org).
+```py
+i.search("Nat Rev Earth Environ") # [{'journal': 'Nature Reviews Earth & Environment', 'factor': 37.214, 'score': 100}]
+i.search("101771060") # [{'journal': 'Nature Reviews Earth & Environment', 'factor': 37.214, 'score': 100}]
+i.search('2662-138X') # [{'journal': 'Nature Reviews Earth & Environment', 'factor': 37.214, 'score': 100}]
+# Filter results by impact factor
+i.search("Neural network", threshold=85, min_impact=1.5, max_impact=20)
+# [
+#   {'journal': 'IEEE Transactions on Neural Networks and Learning Systems', 'factor': 14.255, 'score': 93},
+#   {'journal': 'NEURAL NETWORKS', 'factor': 9.657, 'score': 91},
+#   {'journal': 'WORK-A Journal of Prevention Assessment & Rehabilitation', 'factor': 1.803, 'score': 86},
+#   {'journal': 'NETWORK-COMPUTATION IN NEURAL SYSTEMS', 'factor': 1.5, 'score': 92}
+# ]
+# Show all fields
+i.search("quantum information", threshold=90, return_all=True)
+# [
+#   {'factor': 10.758, 'jcr': 'Q1', 'journal_abbr': 'npj Quantum Inf', 'eissn': '2056-6387', 'journal': 'npj Quantum Information', 'nlm_id': '101722857', 'issn': '', 'score': 92},
+#   {'factor': 1.577, 'jcr': 'Q3', 'journal_abbr': 'Nation', 'eissn': '0027-8378', 'journal': 'NATION', 'nlm_id': '9877123', 'issn': '0027-8378', 'score': 91}
+# ]
 ```
-Then, `i.journal_to_if` should give you a dictionary wit journal to IF mappings for >9000 journals as of 2014.
 ### Plotting

{paperscraper-0.2.8 → paperscraper-0.2.10}/paperscraper.egg-info/SOURCES.txt RENAMED Viewed

@@ -2,7 +2,7 @@ LICENSE
 README.md
 setup.py
 paperscraper/__init__.py
-paperscraper/journal_if.py
+paperscraper/impact.py
 paperscraper/load_dumps.py
 paperscraper/pdf.py
 paperscraper/plotting.py
@@ -31,6 +31,8 @@ paperscraper/pubmed/utils.py
 paperscraper/scholar/__init__.py
 paperscraper/scholar/scholar.py
 paperscraper/server_dumps/__init__.py
+paperscraper/tests/__init__.py
+paperscraper/tests/test_impactor.py
 paperscraper/xrxiv/__init__.py
 paperscraper/xrxiv/xrxiv_api.py
 paperscraper/xrxiv/xrxiv_query.py

{paperscraper-0.2.8 → paperscraper-0.2.10}/paperscraper.egg-info/requires.txt RENAMED Viewed

@@ -8,3 +8,6 @@ seaborn
 matplotlib
 matplotlib_venn
 bs4
+impact-factor>=1.1.0
+thefuzz
+pytest

{paperscraper-0.2.8 → paperscraper-0.2.10}/setup.py RENAMED Viewed

@@ -1,10 +1,10 @@
 """Install package."""
-import os
-from setuptools import setup
-from setuptools import find_packages
 import io
+import os
 import re
+from setuptools import find_packages, setup
 __version__ = re.search(
     r'__version__\s*=\s*[\'"]([^\'"]*)[\'"]',
     io.open("paperscraper/__init__.py", encoding="utf_8_sig").read(),
@@ -36,6 +36,9 @@ setup(
         "matplotlib",
         "matplotlib_venn",
         "bs4",
+        "impact-factor>=1.1.0",
+        "thefuzz",
+        "pytest",
     ],
     keywords=[
         "Academics",
@@ -47,6 +50,7 @@ setup(
         "Medrxiv",
         "Biorxiv",
         "Chemrxiv",
+        "Google Scholar",
     ],
     packages=find_packages("."),
     package_data={"paperscraper.server_dumps": ["*"]},

paperscraper-0.2.8/paperscraper/journal_if.py DELETED Viewed

@@ -1,155 +0,0 @@
-"""
-Class to fetch the impact factor of all citefactor-indexed journals.
-Limitation: Fetches the 2014 IFs.
-Adapted from: https://github.com/andrew-hill/impactor/blob/master/impactor.py
-Available via MIT License.
-Adaptions:
-- Converting code from Python2 to Python3.
-- Fetching IFs from *all* journals not just from journals starting with "A".
-"""
-import logging
-import pickle
-import re
-import string
-from urllib.request import urlopen
-# http://www.crummy.com/software/BeautifulSoup/
-from bs4 import BeautifulSoup
-class Impactor(object):
-    """
-    Class to fetch the impact factor of all citefactor-indexed journals as of 2014.
-    """
-    BASE_URL_PREFIX = r"http://www.citefactor.org/journal-impact-factor-list-"
-    BASE_URL_SUFFIX = r".html"
-    URL_REGEX_PREFIX = r"http://www\.citefactor\.org/journal-impact-factor-list-"
-    URL_REGEX_SUFFIX = r"_?[A-Z]?\.html"
-    def __init__(self, journal_db_file=None, year=2014):
-        logging.debug("journal_db_file={}, year={}".format(journal_db_file, year))
-        self.journal_data = None
-        self.journal_db_file = journal_db_file
-        self.matches = set()
-        self.year = year
-        assert year in (2014,), "Can only handle 2014 at the moment."
-        self.base_url = self.BASE_URL_PREFIX + str(year) + self.BASE_URL_SUFFIX
-        self.url_regex = self.URL_REGEX_PREFIX + str(year) + self.URL_REGEX_SUFFIX
-        self.re = re.compile(self.url_regex)
-        self.load()
-        self.save()
-        self.create_if_dict()
-    def match(self, search_terms):
-        # If no terms specified, show all entries
-        if search_terms is None or len(search_terms) == 0:
-            for j in self.journal_data.values():
-                self.matches.add(j["ISSN"])
-        # Otherwise do search
-        issn_re = re.compile(r"\d{4}-\d{4}")
-        for s in search_terms:
-            if issn_re.match(s):
-                self.matches.add(s)
-            else:
-                for j in self.journal_data.values():
-                    if j["JOURNAL"].lower().find(s.lower()) >= 0:
-                        self.matches.add(j["ISSN"])
-    def load(self):
-        # Try to load from file
-        if self.journal_db_file is not None:
-            try:
-                with open(self.journal_db_file, "rb") as f:
-                    self.journal_data = pickle.load(f)
-                    logging.debug(
-                        "loaded journals from {}".format(self.journal_db_file)
-                    )
-            except Exception:
-                pass
-        # If cannot load from file, load from URL
-        if self.journal_data is None:
-            logging.info("Fetching database from citefactor.org...")
-            self.journal_data = self.get_all_journal_data()
-    def save(self):
-        if self.journal_db_file is not None:
-            try:
-                with open(self.journal_db_file, "wb") as f:
-                    pickle.dump(self.journal_data, f, -1)
-                    logging.debug("saved journals to {}".format(self.journal_db_file))
-            except Exception:
-                pass
-    def get_all_urls(self):
-        main_page_content = urlopen(self.base_url).read()
-        soup = BeautifulSoup(main_page_content)
-        soup.prettify()  # necessary?
-        return [
-            self.base_url,
-        ] + [anchor["href"] for anchor in soup.find_all("a", href=self.re)]
-    def get_journal_table(self, url):
-        content = urlopen(url).read()
-        soup = BeautifulSoup(content)
-        soup.prettify()  # necessary?
-        t = soup.table
-        caption_re = re.compile(
-            r"^Impact Factor " + str(self.year)
-        )  # works for Year==2015 only
-        while t is not None:
-            if (
-                t.caption is None
-                or t.caption.string is None
-                or caption_re.match(t.caption.string) is None
-            ):
-                t = t.find_next()
-                continue
-            return t
-    def get_table_headers(self, table):
-        return [str(x.string) for x in table.tr.find_all("td")]
-    def get_journal_data(self, table):
-        headers = self.get_table_headers(table)
-        journals = dict()
-        for row in table.find_all("tr")[1:]:
-            cells = row.find_all("td")
-            j = dict(zip(headers, [str(x.string) for x in cells]))
-            # logging.debug('importing: {}'.format(j))
-            journals[j["ISSN"]] = j
-        return journals
-    def get_all_journal_data(self):
-        journals = dict()
-        for url in self.get_all_urls():
-            for page in string.ascii_uppercase:
-                page = "0-A" if page == "A" else page
-                url_page = url.split("2014")[0] + "2014_" + page + url.split("2014")[1]
-                table = self.get_journal_table(url_page)
-                journals.update(self.get_journal_data(table))
-        logging.info(
-            "imported {} journal entries from citefactor.org".format(len(journals))
-        )
-        return journals
-    def create_if_dict(self):
-        """
-        Creates a dictionary with journal names as key (lowercase) and impact factors
-        as values.
-        """
-        stringparse = (
-            lambda x: str(x).strip().lower().replace("\\", "_").replace(" ", "_")
-        )
-        self.journal_to_if = dict(
-            (stringparse(value["JOURNAL"]), value["2013/2014"])
-            for key, value in self.journal_data.items()
-        )