PyPI - hdx-python-scraper - Versions diffs - 2.3.0__tar.gz → 2.3.2__tar.gz - Mend

hdx-python-scraper 2.3.0tar.gz → 2.3.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (131) hide show

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/.config/pre-commit-config.yaml RENAMED Viewed

@@ -6,6 +6,7 @@ repos:
     hooks:
       - id: trailing-whitespace
       - id: end-of-file-fixer
+        exclude: test_scraper_.*\.json
       - id: check-ast
   - repo: https://github.com/psf/black
     rev: 23.3.0

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hdx-python-scraper
-Version: 2.3.0
+Version: 2.3.2
 Summary: HDX Python scraper utilities to assemble data from multiple sources
 Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
 Author-email: Michael Rans <rans@email.com>
@@ -26,7 +26,7 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.8
 Requires-Dist: gspread
-Requires-Dist: hdx-python-api>=6.1.4
+Requires-Dist: hdx-python-api>=6.2.0
 Requires-Dist: hdx-python-country>=3.6.3
 Requires-Dist: regex
 Provides-Extra: dev

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/documentation/main.md RENAMED Viewed

@@ -438,7 +438,7 @@ configured:
 The economicindex configurable scraper reads the dataset
 “covid-19-economic-exposure-index” on HDX, taking from it dataset source,
-reference period and using the url of the dataset in HDX as the source url. (In HDX data
+time period and using the url of the dataset in HDX as the source url. (In HDX data
 explorers, these are used by the DATA links.) The scraper framework finds the first
 resource that is of format `xlsx`, reads the “economic exposure” sheet and looks for the
 headers in row 1 (by default). Note that it is possible to specify a specific resource

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/pyproject.toml RENAMED Viewed

@@ -34,7 +34,7 @@ classifiers = [
 requires-python = ">=3.8"
 dependencies = [
-    "hdx-python-api>=6.1.4",
+    "hdx-python-api>=6.2.0",
     "hdx-python-country>=3.6.3",
     "gspread",
     "regex",

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/requirements.txt RENAMED Viewed

@@ -6,7 +6,7 @@
 #
 annotated-types==0.6.0
     # via pydantic
-attrs==23.1.0
+attrs==23.2.0
     # via
     #   frictionless
     #   jsonlines
@@ -29,13 +29,13 @@ click==8.1.7
     # via typer
 colorama==0.4.6
     # via typer
-coverage[toml]==7.3.2
+coverage[toml]==7.4.0
     # via pytest-cov
 cryptography==41.0.7
     # via pyopenssl
 defopt==6.4.0
     # via hdx-python-api
-distlib==0.3.7
+distlib==0.3.8
     # via virtualenv
 dnspython==2.4.2
     # via email-validator
@@ -57,17 +57,17 @@ google-auth==2.25.2
     # via
     #   google-auth-oauthlib
     #   gspread
-google-auth-oauthlib==1.1.0
+google-auth-oauthlib==1.2.0
     # via gspread
-gspread==5.12.2
+gspread==5.12.4
     # via hdx-python-scraper (pyproject.toml)
-hdx-python-api==6.1.4
+hdx-python-api==6.2.0
     # via hdx-python-scraper (pyproject.toml)
 hdx-python-country==3.6.3
     # via
     #   hdx-python-api
     #   hdx-python-scraper (pyproject.toml)
-hdx-python-utilities==3.6.2
+hdx-python-utilities==3.6.3
     # via
     #   hdx-python-api
     #   hdx-python-country
@@ -119,7 +119,7 @@ nodeenv==1.8.0
     # via pre-commit
 num2words==0.5.13
     # via quantulum3
-numpy==1.26.2
+numpy==1.26.3
     # via pandas
 oauthlib==3.2.2
     # via requests-oauthlib
@@ -153,11 +153,11 @@ pyasn1-modules==0.3.0
     # via google-auth
 pycparser==2.21
     # via cffi
-pydantic==2.5.2
+pydantic==2.5.3
     # via
     #   frictionless
     #   inflect
-pydantic-core==2.14.5
+pydantic-core==2.14.6
     # via pydantic
 pygments==2.17.2
     # via rich
@@ -169,7 +169,7 @@ pyphonetics==0.5.3
     # via hdx-python-country
 pyrsistent==0.20.0
     # via jsonschema
-pytest==7.4.3
+pytest==7.4.4
     # via
     #   hdx-python-scraper (pyproject.toml)
     #   pytest-cov
@@ -198,7 +198,7 @@ quantulum3==0.9.0
     # via hdx-python-api
 ratelimit==2.2.1
     # via hdx-python-utilities
-regex==2023.10.3
+regex==2023.12.25
     # via hdx-python-scraper (pyproject.toml)
 requests==2.31.0
     # via
@@ -238,7 +238,7 @@ sphinxcontrib-napoleon==0.7
     # via defopt
 stringcase==1.2.0
     # via frictionless
-structlog==23.2.0
+structlog==23.3.0
     # via libhxl
 tableschema-to-template==0.0.13
     # via hdx-python-utilities
@@ -255,7 +255,7 @@ typing-extensions==4.9.0
     #   pydantic
     #   pydantic-core
     #   typer
-tzdata==2023.3
+tzdata==2023.4
     # via pandas
 unidecode==1.3.7
     # via

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/_version.py RENAMED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '2.3.0'
-__version_tuple__ = version_tuple = (2, 3, 0)
+__version__ = version = '2.3.2'
+__version_tuple__ = version_tuple = (2, 3, 2)

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/base_scraper.py RENAMED Viewed

@@ -16,6 +16,7 @@ class BaseScraper(ABC):
         datasetinfo (Dict): Information about dataset
         headers (Dict[str, Tuple]): Headers to be oytput at each level_name
         source_configuration (Dict): Configuration for sources. Defaults to empty dict (use defaults).
+        reader (str): Reader to use. Defaults to "" (datasetinfo reader falling back on name).
     """
     population_lookup = {}
@@ -26,15 +27,20 @@ class BaseScraper(ABC):
         datasetinfo: Dict,
         headers: Dict[str, Tuple],
         source_configuration: Dict = {},
+        reader: str = "",
     ) -> None:
-        self.setup(name, headers, source_configuration)
+        self.name = name
+        if reader:
+            self.reader = reader
+        else:
+            self.reader = datasetinfo.get("reader", name)
+        self.setup(headers, source_configuration)
         self.datasetinfo = deepcopy(datasetinfo)
         self.errors_on_exit = None
         self.can_fallback = True
     def setup(
         self,
-        name: str,
         headers: Dict[str, Tuple],
         source_configuration: Dict = {},
     ) -> None:
@@ -42,14 +48,12 @@ class BaseScraper(ABC):
         {"national": (("School Closure",), ("#impact+type",)), ...},
         Args:
-            name (str): Name of scraper
             headers (Dict[str, Tuple]): Headers to be output at each level_name
             source_configuration (Dict): Configuration for sources. Defaults to empty dict (use defaults).
         Returns:
              None
         """
-        self.name = name
         self.headers = headers
         self.initialise_values_sources(source_configuration)
         self.has_run = False
@@ -92,7 +96,7 @@ class BaseScraper(ABC):
              None
         """
         if not name:
-            name = self.name
+            name = self.reader
         reader = Read.get_reader(name)
         return reader
@@ -361,10 +365,16 @@ class BaseScraper(ABC):
             return None
         if "is_hxl" in hapi_resource_metadata:
             return hapi_resource_metadata
-        reader = self.get_reader(self.name)
+        reader = self.get_reader()
         filename = self.datasetinfo.get("filename")
+        file_prefix = self.datasetinfo.get("file_prefix", self.name)
+        if filename:
+            kwargs = {"filename": filename}
+        else:
+            kwargs = {"file_prefix": file_prefix}
         hxl_info = reader.hxl_info_hapi_resource_metadata(
-            hapi_resource_metadata, filename=filename, file_prefix=self.name
+            hapi_resource_metadata,
+            **kwargs,
         )
         is_hxl = False
         if hxl_info:

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/configurable/scraper.py RENAMED Viewed

@@ -70,6 +70,8 @@ class ConfigurableScraper(BaseScraper):
         errors_on_exit: Optional[ErrorsOnExit] = None,
         **kwargs: Any,
     ):
+        self.name = name
+        self.reader = datasetinfo.get("reader", name)
         self.level = level
         datelevel = datasetinfo.get("date_level")
         if datelevel is None:
@@ -98,11 +100,11 @@ class ConfigurableScraper(BaseScraper):
             use_hxl = self.datasetinfo.get("use_hxl", False)
             if use_hxl:
                 try:
-                    file_headers, iterator = self.get_iterator(name)
+                    file_headers, iterator = self.get_iterator()
                     self.use_hxl(headers, file_headers, iterator)
                 except DownloadError:
                     self.can_fallback = False
-        self.setup(name, headers, source_configuration)
+        self.setup(headers, source_configuration)
     @staticmethod
     def get_subsets_from_datasetinfo(datasetinfo: Dict) -> List[Dict]:
@@ -136,20 +138,18 @@ class ConfigurableScraper(BaseScraper):
             ]
         return subsets
-    def get_iterator(self, name: str) -> Tuple[List[str], Iterator[Dict]]:
-        """Get the iterator from the preconfigured reader for the given scraper name
-        Args:
-            name (str): Name of scraper
+    def get_iterator(self) -> Tuple[List[str], Iterator[Dict]]:
+        """Get the iterator from the preconfigured reader for this scraper
         Returns:
             Tuple[List[str],Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary)
         """
-        return self.get_reader(name).read(
-            self.datasetinfo,
-            file_prefix=name,
-            **self.variables,
-        )
+        if (
+            "filename" not in self.datasetinfo
+            and "file_prefix" not in self.datasetinfo
+        ):
+            self.datasetinfo["file_prefix"] = self.name
+        return self.get_reader().read(self.datasetinfo, **self.variables)
     def add_sources(self) -> None:
         """Add source for each HXL hashtag
@@ -466,7 +466,7 @@ class ConfigurableScraper(BaseScraper):
         Returns:
             None
         """
-        file_headers, iterator = self.get_iterator(self.name)
+        file_headers, iterator = self.get_iterator()
         header_to_hxltag = self.use_hxl(None, file_headers, iterator)
         if "source_url" not in self.datasetinfo:
             self.datasetinfo["source_url"] = self.datasetinfo["url"]

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/configurable/timeseries.py RENAMED Viewed

@@ -50,7 +50,7 @@ class TimeSeries(BaseScraper):
             "output_hxl"
         ]
         rows = [headers, hxltags]
-        file_headers, iterator = self.get_reader(self.name).read(
+        file_headers, iterator = self.get_reader().read(
             self.datasetinfo, file_prefix=self.name
         )
         for inrow in iterator:

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/runner.py RENAMED Viewed

@@ -10,7 +10,7 @@ from .configurable.resource_downloader import ResourceDownloader
 from .configurable.scraper import ConfigurableScraper
 from .configurable.timeseries import TimeSeries
 from .outputs.base import BaseOutput
-from .utilities import get_startend_dates_from_reference_period
+from .utilities import get_startend_dates_from_time_period
 from .utilities.fallbacks import Fallbacks
 from .utilities.reader import Read
 from .utilities.sources import Sources
@@ -29,7 +29,7 @@ class Runner:
         countryiso3s (ListTuple[str]): List of ISO3 country codes to process
         today (datetime): Value to use for today. Defaults to now_utc().
         errors_on_exit (ErrorsOnExit): ErrorsOnExit object that logs errors on exit
-        scrapers_to_run (Optional[ListTuple[str]]): Scrapers to run. Defaults to None.
+        scrapers_to_run (Optional[ListTuple[str]]): Scrapers to run. Defaults to None (all scrapers).
     """
     def __init__(
@@ -1061,7 +1061,7 @@ class Runner:
                 if dataset_name:
                     dataset = reader.read_dataset(dataset_name)
                     if date is None:
-                        date = get_startend_dates_from_reference_period(
+                        date = get_startend_dates_from_time_period(
                             dataset, today=self.today
                         )
                     if source_name is None:
@@ -1209,32 +1209,52 @@ class Runner:
         """
         if not names:
             names = self.scrapers.keys()
-        results = {}
+        hapi_results = {}
         def add_results(scraper_level, scrap, levels_used):
-            nonlocal results
+            nonlocal hapi_results
             if scraper_level in levels_used:
                 return
             headers = scrap.headers.get(scraper_level)
             if headers is None:
                 return
+            headings = headers[0]
+            hxltags = headers[1]
             values = scrap.get_values(scraper_level)
             hapi_dataset_metadata = scrap.get_hapi_dataset_metadata()
+            if not hapi_dataset_metadata:
+                return
             hapi_resource_metadata = scrap.get_hapi_resource_metadata()
+            if not hapi_resource_metadata:
+                return
             dataset_id = hapi_dataset_metadata["hdx_id"]
-            hapi_metadata = results.get(
+            hapi_metadata = hapi_results.get(
                 dataset_id, copy(hapi_dataset_metadata)
             )
-            level_results = hapi_metadata.get("results", {})
-            level_results[scraper_level] = {
-                "headers": headers,
-                "values": values,
-                "hapi_resource_metadata": hapi_resource_metadata,
-            }
-            hapi_metadata["results"] = level_results
+            results = hapi_metadata.get("results", {})
+            level_results = results.get(scraper_level)
+            if level_results is None:
+                level_results = {
+                    "headers": ([], []),
+                    "values": [],
+                    "hapi_resource_metadata": hapi_resource_metadata,
+                }
+                results[scraper_level] = level_results
+            lev_headings = level_results["headers"][0]
+            lev_hxltags = level_results["headers"][1]
+            lev_values = level_results["values"]
+            for i, hxltag in enumerate(hxltags):
+                if hxltag in lev_hxltags:
+                    index = lev_hxltags.index(hxltag)
+                    lev_values[index].update(values[i])
+                else:
+                    lev_headings.append(headings[i])
+                    lev_hxltags.append(hxltag)
+                    lev_values.append(values[i])
+            hapi_metadata["results"] = results
             levels_used.add(scraper_level)
-            results[dataset_id] = hapi_metadata
+            hapi_results[dataset_id] = hapi_metadata
         for name in names:
             if self.scrapers_to_run and not any(
@@ -1247,4 +1267,4 @@ class Runner:
             lvls_used = set()
             for scrap_level in scraper.headers:
                 add_results(scrap_level, scraper, lvls_used)
-        return results
+        return hapi_results

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/utilities/__init__.py RENAMED Viewed

@@ -57,22 +57,22 @@ def get_rowval(row: Dict, valcol: str) -> Any:
         return result
-def get_startend_dates_from_reference_period(
+def get_startend_dates_from_time_period(
     dataset: Dataset, today: Optional[datetime] = None
 ) -> Optional[Dict]:
-    """Return the reference period in form required for source_date
+    """Return the time period in form required for source_date
     Args:
         dataset (Dataset): Dataset object
         today (Optional[datetime]): Date to use for today. Defaults to None (datetime.utcnow)
     Returns:
-        Optional[Dict]: Reference period in form required for source_date
+        Optional[Dict]: Time period in form required for source_date
     """
     if today is None:
-        date_info = dataset.get_reference_period()
+        date_info = dataset.get_time_period()
     else:
-        date_info = dataset.get_reference_period(today=today)
+        date_info = dataset.get_time_period(today=today)
     startdate = date_info.get("startdate")
     enddate = date_info.get("enddate")
     if enddate is None:

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/src/hdx/scraper/utilities/reader.py RENAMED Viewed

@@ -8,7 +8,7 @@ import hxl
 from hxl.input import InputOptions, munge_url
 from slugify import slugify
-from . import get_startend_dates_from_reference_period, match_template
+from . import get_startend_dates_from_time_period, match_template
 from .sources import Sources
 from hdx.data.dataset import Dataset
 from hdx.data.resource import Resource
@@ -224,6 +224,13 @@ class Read(Retrieve):
             filename = datasetinfo.get("filename")
             if filename:
                 kwargs["filename"] = filename
+        if filename:
+            # remove file_prefix if filename provided
+            kwargs.pop("file_prefix", None)
+        elif "file_prefix" not in kwargs:
+            file_prefix = datasetinfo.get("file_prefix")
+            if file_prefix:
+                kwargs["file_prefix"] = file_prefix
         return self.get_tabular_rows(
             url,
             dict_form=True,
@@ -311,7 +318,7 @@ class Read(Retrieve):
         """
         return self.construct_filename_and_download(
             resource["name"],
-            resource.get_file_type(),
+            resource.get_format(),
             resource["url"],
             **kwargs,
         )
@@ -331,7 +338,7 @@ class Read(Retrieve):
             "title": dataset["title"],
             "hdx_provider_stub": dataset["organization"]["name"],
             "hdx_provider_name": dataset["organization"]["title"],
-            "reference_period": dataset.get_reference_period(today=self.today),
+            "reference_period": dataset.get_time_period(today=self.today),
         }
     @staticmethod
@@ -477,7 +484,7 @@ class Read(Retrieve):
             if "source_date" not in datasetinfo:
                 datasetinfo[
                     "source_date"
-                ] = get_startend_dates_from_reference_period(
+                ] = get_startend_dates_from_time_period(
                     dataset, today=self.today
                 )
             if "source" not in datasetinfo:
@@ -510,7 +517,7 @@ class Read(Retrieve):
                     key = "default_date"
                 else:
                     key = hxltag
-                source_date[key] = get_startend_dates_from_reference_period(
+                source_date[key] = get_startend_dates_from_time_period(
                     dataset, today=self.today
                 )
             if source is not None:
@@ -555,10 +562,14 @@ class Read(Retrieve):
             datasetinfo["filename"] = filename
         filename = datasetinfo.get("filename")
         if resource and not filename:
-            # prefix is added later
             filename = self.construct_filename(
-                resource["name"], resource.get_file_type()
+                resource["name"], resource.get_format()
             )
+            file_prefix = kwargs.get("file_prefix")
+            if not file_prefix:
+                file_prefix = datasetinfo.get("file_prefix")
+            if file_prefix:
+                filename = f"{file_prefix}_{filename}"
             datasetinfo["filename"] = filename
         return self.read_tabular(datasetinfo, **kwargs)

hdx_python_scraper-2.3.2/tests/fixtures/input/sahel-humanitarian-needs-overview_prefix.json ADDED Viewed

@@ -0,0 +1 @@

+ {"owner_org": "ac91832d-2477-4e1f-8520-9a591a7c3d69", "maintainer": "f240651d-52d6-4a3c-b229-b5dd0443d642", "relationships_as_object": [], "package_creator": "nafissah", "private": false, "dataset_date": "[2016-09-01T00:00:00 TO 2016-09-01T23:59:59]", "num_tags": 3, "solr_additions": "{\"countries\": [\"Benin\", \"Burkina Faso\", \"Cameroon\", \"Chad\", \"Gambia\", \"Mali\", \"Niger\", \"Nigeria\", \"Senegal\"]}", "id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "metadata_created": "2017-03-10T09:30:34.278280", "archived": false, "methodology_other": "Countries data aggregation. ", "metadata_modified": "2019-09-11T16:15:43.873688", "title": "Sahel : Humanitarian Needs Overview", "tags": [{"vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1", "state": "active", "display_name": "humanitarian needs overview - hno", "id": "bfd4300a-b35f-4c3d-a14b-05e0f040ad29", "name": "humanitarian needs overview - hno"}, {"vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1", "state": "active", "display_name": "hxl", "id": "a0fbb23a-6aad-4ccc-8062-e9ef9f20e5d2", "name": "hxl"}, {"vocabulary_id": "b891512e-9516-4bf5-962a-7a289772a2a1", "state": "active", "display_name": "people in need - pin", "id": "47dd0671-78c8-4359-9f96-5cfe4b86b48c", "name": "people in need - pin"}], "state": "active", "relationships_as_subject": [], "methodology": "Other", "version": null, "is_requestdata_type": false, "creator_user_id": "7711391a-7647-4432-a71b-294e7f901a2c", "type": "dataset", "has_showcases": true, "due_date": "2018-03-10T10:08:37", "dataset_preview": "first_resource", "num_resources": 5, "dataset_source": "Multiple organisations", "subnational": "1", "last_modified": "2017-03-10T10:08:37.690970", "groups": [{"display_name": "Benin", "description": "", "title": "Benin", "image_display_url": "", "id": "ben", "name": "ben"}, {"display_name": "Burkina Faso", "description": "", "title": "Burkina Faso", "image_display_url": "", "id": "bfa", "name": "bfa"}, {"display_name": "Cameroon", "description": "", "title": "Cameroon", "image_display_url": "", "id": "cmr", "name": "cmr"}, {"display_name": "Chad", "description": "", "title": "Chad", "image_display_url": "", "id": "tcd", "name": "tcd"}, {"display_name": "Gambia", "description": "", "title": "Gambia", "image_display_url": "", "id": "gmb", "name": "gmb"}, {"display_name": "Mali", "description": "", "title": "Mali", "image_display_url": "", "id": "mli", "name": "mli"}, {"display_name": "Niger", "description": "", "title": "Niger", "image_display_url": "", "id": "ner", "name": "ner"}, {"display_name": "Nigeria", "description": "", "title": "Nigeria", "image_display_url": "", "id": "nga", "name": "nga"}, {"display_name": "Senegal", "description": "", "title": "Senegal", "image_display_url": "", "id": "sen", "name": "sen"}], "license_id": "cc-by", "has_quickcharts": true, "has_geodata": false, "overdue_date": "2018-05-09T10:08:37", "total_res_downloads": 3083, "qa_completed": true, "name": "sahel-humanitarian-needs-overview", "isopen": true, "url": null, "notes": "This dataset is produced by the United Nations for the Coordination of Humanitarian Affairs (OCHA) in collaboration with humanitarian partners. It covers the period from January to December 2017 and was issued on December 2016.", "license_title": "Creative Commons Attribution International", "batch": "7c91002c-9c43-4c56-ac78-02a20ba9575e", "license_url": "http://www.opendefinition.org/licenses/cc-by", "pageviews_last_14_days": 4, "organization": {"description": "OCHA Regional Office for West and Central Africa (ROWCA).\r\n\r\nDans les pays de l'Afrique de l\u2019Ouest et du Centre, l'ins\u00e9curit\u00e9 alimentaire et la malnutrition s\u2019aggravent avec l'impact des catastrophes naturelles, le changement climatique, l\u2019\u00e9volution d\u00e9mographique, l'urbanisation mal g\u00e9r\u00e9e, les \u00e9pid\u00e9mies et les conflits violents.", "title": "OCHA West and Central Africa (ROWCA)", "created": "2014-09-26T15:15:48.616313", "approval_status": "approved", "is_organization": true, "state": "active", "image_url": "", "type": "organization", "id": "ac91832d-2477-4e1f-8520-9a591a7c3d69", "name": "ocha-rowca"}, "data_update_frequency": "365", "is_fresh": false, "update_status": "needs_update", "x_resource_grouping": [], "resources": [{"cache_last_updated": null, "package_id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "datastore_active": false, "id": "2527ac5b-66fe-46f0-8b9b-7086d2c4ddd3", "size": 1291, "metadata_modified": "2017-03-10T09:43:15.837967", "download_url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/2527ac5b-66fe-46f0-8b9b-7086d2c4ddd3/download/hno-2017-sahel-nutrition.csv", "state": "active", "hash": "", "description": "HNO 2017 Sahel - Nutrition ", "format": "CSV", "hdx_rel_url": "/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/2527ac5b-66fe-46f0-8b9b-7086d2c4ddd3/download/hno-2017-sahel-nutrition.csv", "mimetype_inner": null, "url_type": "upload", "originalHash": "1719647142", "mimetype": null, "cache_url": null, "name": "HNO -2017 -Sahel-nutrition.csv", "created": "2017-03-10T09:43:15.837967", "url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/2527ac5b-66fe-46f0-8b9b-7086d2c4ddd3/download/hno-2017-sahel-nutrition.csv", "tracking_summary[recent]": "0", "last_modified": "2017-03-10T10:08:37.690970", "tracking_summary[total]": "0", "position": 0, "resource_type": "file.upload"}, {"cache_last_updated": null, "package_id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "datastore_active": false, "id": "487e329d-3959-49bf-8e12-2675be7008ae", "size": 2306, "metadata_modified": "2017-03-10T09:46:28.411331", "download_url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/487e329d-3959-49bf-8e12-2675be7008ae/download/hno-2017-sahel-lake-chad-nutrition-as-of-25_11_2016.csv", "state": "active", "hash": "", "description": "HNO 2017 sahel - Lake Chad - Nutrition", "format": "CSV", "hdx_rel_url": "/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/487e329d-3959-49bf-8e12-2675be7008ae/download/hno-2017-sahel-lake-chad-nutrition-as-of-25_11_2016.csv", "mimetype_inner": null, "url_type": "upload", "originalHash": "97196323", "mimetype": null, "cache_url": null, "name": "HNO 2017 Sahel-Lake Chad Nutrition as of 25_11_2016.csv", "created": "2017-03-10T09:46:28.411331", "url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/487e329d-3959-49bf-8e12-2675be7008ae/download/hno-2017-sahel-lake-chad-nutrition-as-of-25_11_2016.csv", "last_modified": "2017-03-10T10:08:37.690970", "position": 1, "resource_type": "file.upload"}, {"cache_last_updated": null, "package_id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "datastore_active": false, "id": "d648d73f-8e66-461f-864f-0d66b19bcfa2", "size": 10348, "metadata_modified": "2017-03-10T10:01:38.716006", "download_url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d648d73f-8e66-461f-864f-0d66b19bcfa2/download/hno-2017-sahel-displacement_per_country.xlsx", "state": "active", "hash": "", "description": "HNO 2017 Sahel - Displacement per country", "format": "XLSX", "hdx_rel_url": "/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d648d73f-8e66-461f-864f-0d66b19bcfa2/download/hno-2017-sahel-displacement_per_country.xlsx", "mimetype_inner": null, "url_type": "upload", "originalHash": "1345568791", "mimetype": null, "cache_url": null, "name": "HNO-2017-Sahel- Displacement_per_country.xlsx", "created": "2017-03-10T10:01:38.716006", "url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d648d73f-8e66-461f-864f-0d66b19bcfa2/download/hno-2017-sahel-displacement_per_country.xlsx", "last_modified": "2017-03-10T10:08:37.690970", "position": 2, "resource_type": "file.upload"}, {"cache_last_updated": null, "package_id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "datastore_active": false, "id": "d9248be4-7bfb-4a81-a7aa-c035dcb737a2", "size": 10275, "metadata_modified": "2017-03-10T10:03:01.842877", "download_url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d9248be4-7bfb-4a81-a7aa-c035dcb737a2/download/hno-2017-sahel-people-in-need.xlsx", "state": "active", "hash": "", "description": "HNO 2017 Sahel - People in need", "format": "XLSX", "hdx_rel_url": "/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d9248be4-7bfb-4a81-a7aa-c035dcb737a2/download/hno-2017-sahel-people-in-need.xlsx", "mimetype_inner": null, "url_type": "upload", "originalHash": "97196323", "mimetype": null, "cache_url": null, "name": "2017-Sahel- People in need.xlsx", "created": "2017-03-10T10:03:01.842877", "url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/d9248be4-7bfb-4a81-a7aa-c035dcb737a2/download/hno-2017-sahel-people-in-need.xlsx", "last_modified": "2017-03-10T10:08:37.690970", "position": 3, "resource_type": "file.upload"}, {"cache_last_updated": null, "package_id": "47f6ef46-500f-421a-9fa2-fefd93facf95", "datastore_active": false, "id": "798b6a50-75da-4c8c-8034-4a4439630a3a", "size": 9430, "metadata_modified": "2017-03-10T10:04:40.479364", "download_url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/798b6a50-75da-4c8c-8034-4a4439630a3a/download/hno-2017-sahel-food-insecurity.xlsx", "state": "active", "hash": "", "description": "HNO 2017 Sahel - Food Insecurity", "format": "XLSX", "hdx_rel_url": "/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/798b6a50-75da-4c8c-8034-4a4439630a3a/download/hno-2017-sahel-food-insecurity.xlsx", "mimetype_inner": null, "url_type": "upload", "originalHash": "97196323", "mimetype": null, "cache_url": null, "name": "HNO-2017-Sahel-Food Insecurity.xlsx", "created": "2017-03-10T10:04:40.479364", "url": "https://data.humdata.org/dataset/47f6ef46-500f-421a-9fa2-fefd93facf95/resource/798b6a50-75da-4c8c-8034-4a4439630a3a/download/hno-2017-sahel-food-insecurity.xlsx", "last_modified": "2017-03-10T10:08:37.690970", "position": 4, "resource_type": "file.upload"}]}

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/education_closures.py RENAMED Viewed

@@ -19,6 +19,7 @@ class EducationClosures(BaseScraper):
                     ("#status+country+closed",),
                 ),
             },
+            reader="hdx",  # Just to test passing a specific reader to use
         )
         self.today = today
         self.countryiso3s = countryiso3s
@@ -37,7 +38,7 @@ class EducationClosures(BaseScraper):
     def run(self) -> None:
         closures_headers, closures_iterator = self.get_reader().read(
-            self.datasetinfo, file_prefix=self.name
+            self.datasetinfo, file_prefix="education_closures"
         )
         closures = self.get_values("national")[0]
         closed_countries = self.get_values("regional")[0]

{hdx_python_scraper-2.3.0 → hdx_python_scraper-2.3.2}/tests/hdx/scraper/test_readers.py RENAMED Viewed

@@ -40,7 +40,7 @@ class TestReaders:
                     "url": "https://docs.google.com/spreadsheets/d/1NjSI2LaS3SqbgYc0HdD8oIb7lofGtiHgoKKATCpwVdY/edit#gid=1088874596",
                 }
             )
-            resource.set_file_type("csv")
+            resource.set_format("csv")
             dataset.add_update_resource(resource)
             return dataset
@@ -105,7 +105,7 @@ class TestReaders:
                             "url": "https://docs.google.com/spreadsheets/d/1NjSI2LaS3SqbgYc0HdD8oIb7lofGtiHgoKKATCpwVdY/edit#gid=1088874596",
                         }
                     )
-                    resource.set_file_type("csv")
+                    resource.set_format("csv")
                     data = reader.read_hxl_resource(
                         resource, file_prefix="whowhatwhere_afg"
                     )

hdx-python-scraper 2.3.0__tar.gz → 2.3.2__tar.gz

hdx-python-scraper 2.3.0tar.gz → 2.3.2tar.gz