PyPI - hdx-python-scraper - Versions diffs - 2.6.2__py3-none-any.whl → 2.6.4__py3-none-any.whl - Mend

hdx-python-scraper 2.6.2py3-none-any.whl → 2.6.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

hdx/scraper/framework/_version.py CHANGED Viewed

@@ -1,8 +1,13 @@
-# file generated by setuptools_scm
+# file generated by setuptools-scm
 # don't change, don't track in version control
+__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
 TYPE_CHECKING = False
 if TYPE_CHECKING:
-    from typing import Tuple, Union
+    from typing import Tuple
+    from typing import Union
     VERSION_TUPLE = Tuple[Union[int, str], ...]
 else:
     VERSION_TUPLE = object
@@ -12,5 +17,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '2.6.2'
-__version_tuple__ = version_tuple = (2, 6, 2)
+__version__ = version = '2.6.4'
+__version_tuple__ = version_tuple = (2, 6, 4)

hdx/scraper/framework/base_scraper.py CHANGED Viewed

@@ -79,8 +79,7 @@ class BaseScraper(ABC):
              None
         """
         self.values: Dict[str, Tuple] = {
-            level: tuple({} for _ in value[0])
-            for level, value in self.headers.items()
+            level: tuple({} for _ in value[0]) for level, value in self.headers.items()
         }
         self.sources: Dict[str, List] = {level: [] for level in self.headers}
         self.source_configuration = deepcopy(source_configuration)
@@ -137,9 +136,7 @@ class BaseScraper(ABC):
             return
         if self.datasetinfo.get("no_sources", False):
             return
-        should_overwrite_sources = self.datasetinfo.get(
-            "should_overwrite_sources"
-        )
+        should_overwrite_sources = self.datasetinfo.get("should_overwrite_sources")
         if should_overwrite_sources is not None:
             self.source_configuration["should_overwrite_sources"] = (
                 should_overwrite_sources
@@ -209,16 +206,12 @@ class BaseScraper(ABC):
                 )
             for i, hxltag in enumerate(self.headers[level][1]):
-                suffix_attribute = self.source_configuration.get(
-                    "suffix_attribute"
-                )
+                suffix_attribute = self.source_configuration.get("suffix_attribute")
                 if suffix_attribute:
                     add_source(hxltag, suffix_attribute)
                     continue
                 values = self.get_values(level)[i]
-                admin_sources = self.source_configuration.get(
-                    "admin_sources", False
-                )
+                admin_sources = self.source_configuration.get("admin_sources", False)
                 if not admin_sources:
                     raise ValueError("Invalid source configuration!")
                 admin_mapping = self.source_configuration.get("admin_mapping")
@@ -260,9 +253,7 @@ class BaseScraper(ABC):
         """
         if datasetinfo is None:
             datasetinfo = self.datasetinfo
-        date = Sources.get_hxltag_source_date(
-            datasetinfo, hxltag, fallback=True
-        )
+        date = Sources.get_hxltag_source_date(datasetinfo, hxltag, fallback=True)
         if key is None:
             key = self.name
         dict_of_lists_add(

hdx/scraper/framework/outputs/googlesheets.py CHANGED Viewed

@@ -80,9 +80,7 @@ class GoogleSheets(BaseOutput):
                 headers = list(values.columns.values)
                 rows = [headers]
                 if hxltags:
-                    rows.append(
-                        [hxltags.get(header, "") for header in headers]
-                    )
+                    rows.append([hxltags.get(header, "") for header in headers])
                 if limit is not None:
                     values = values.head(limit)
                 df = values.copy(deep=True)

hdx/scraper/framework/outputs/json.py CHANGED Viewed

@@ -219,9 +219,7 @@ class JsonFile(BaseOutput):
                 newjson = self.json.get(key)
                 filters = tabdetails.get("filters", {})
                 hxltags = tabdetails.get("output")
-                if (filters or hxltags or remove) and isinstance(
-                    newjson, list
-                ):
+                if (filters or hxltags or remove) and isinstance(newjson, list):
                     rows = []
                     for row in newjson:
                         ignore_row = False

hdx/scraper/framework/runner.py CHANGED Viewed

@@ -48,9 +48,7 @@ class Runner:
         self.scrapers = {}
         self.scraper_names = []
-    def add_custom(
-        self, scraper: BaseScraper, force_add_to_run: bool = False
-    ) -> str:
+    def add_custom(self, scraper: BaseScraper, force_add_to_run: bool = False) -> str:
         """Add custom scrapers that inherit BaseScraper. If running specific scrapers
         rather than all, and you want to force the inclusion of the scraper in the run
         regardless of the specific scrapers given, the parameter force_add_to_run
@@ -285,9 +283,7 @@ class Runner:
         Returns:
             Optional["Aggregator"]: scraper or None
         """
-        input_headers = self.get_headers(
-            names, [input_level], overrides=overrides
-        )
+        input_headers = self.get_headers(names, [input_level], overrides=overrides)
         input_headers = input_headers.get(input_level)
         if not input_headers:
             return None
@@ -312,9 +308,7 @@ class Runner:
             ) = self.get_values_sourcesinfo_by_header(
                 input_level, names, overrides, True, use_hxl
             )
-            scraper_self.set_input_values_sources(
-                input_values, input_sourcesinfo
-            )
+            scraper_self.set_input_values_sources(input_values, input_sourcesinfo)
         scraper.pre_run = lambda: get_values_sourcesinfo_by_header(scraper)
         return scraper
@@ -469,9 +463,7 @@ class Runner:
         keys = []
         for datasetinfo in configuration:
             keys.append(
-                self.add_resource_downloader(
-                    datasetinfo, folder, force_add_to_run
-                )
+                self.add_resource_downloader(datasetinfo, folder, force_add_to_run)
             )
         return keys
@@ -552,9 +544,7 @@ class Runner:
         for key, value in kwargs.items():
             setattr(scraper, key, value)
-    def add_pre_run(
-        self, name: str, fn: Callable[[BaseScraper], None]
-    ) -> None:
+    def add_pre_run(self, name: str, fn: Callable[[BaseScraper], None]) -> None:
         """Add pre run instance method to scraper instance given scraper name. The
         function should have one parameter. Since it is being added as an instance
         method to the scraper instance, that parameter will be self and hence is of
@@ -570,9 +560,7 @@ class Runner:
         scraper = self.get_scraper_exception(name)
         scraper.pre_run = lambda: fn(scraper)
-    def add_post_run(
-        self, name: str, fn: Callable[[BaseScraper], None]
-    ) -> None:
+    def add_post_run(self, name: str, fn: Callable[[BaseScraper], None]) -> None:
         """Add post run instance method to scraper instance given scraper name. The
         function should have one parameter. Since it is being added as an instance
         method to the scraper instance, that parameter will be self and hence is of
@@ -617,9 +605,7 @@ class Runner:
                         f"Using fallbacks for {scraper.name}! Error: {format_exc()}"
                     )
                 for level in scraper.headers.keys():
-                    values, sources = Fallbacks.get(
-                        level, scraper.headers[level]
-                    )
+                    values, sources = Fallbacks.get(level, scraper.headers[level])
                     scraper.values[level] = values
                     scraper.sources[level] = sources
                 scraper.add_population()
@@ -643,9 +629,7 @@ class Runner:
         Returns:
             bool: Return True if scraper was run, False if not
         """
-        if self.scrapers_to_run and not any(
-            x in name for x in self.scrapers_to_run
-        ):
+        if self.scrapers_to_run and not any(x in name for x in self.scrapers_to_run):
             return False
         logger.info(f"Running {name}")
         return self.run_one(name, force_run)
@@ -728,9 +712,7 @@ class Runner:
             names = self.scrapers.keys()
         results = {}
-        def add_level_results(
-            scraper_level, override_level, scrap, levels_used
-        ):
+        def add_level_results(scraper_level, override_level, scrap, levels_used):
             nonlocal results
             if scraper_level in levels_used:
@@ -802,9 +784,7 @@ class Runner:
             names = self.scrapers.keys()
         results = {}
-        def add_level_results(
-            scraper_level, override_level, scrap, levels_used
-        ):
+        def add_level_results(scraper_level, override_level, scrap, levels_used):
             nonlocal results
             if scraper_level in levels_used:
@@ -830,10 +810,8 @@ class Runner:
             lev_headings = level_results["headers"][0]
             lev_hxltags = level_results["headers"][1]
             lev_values = level_results["values"]
-            scraper_should_overwrite_sources = (
-                scraper.source_configuration.get(
-                    "should_overwrite_sources", should_overwrite_sources
-                )
+            scraper_should_overwrite_sources = scraper.source_configuration.get(
+                "should_overwrite_sources", should_overwrite_sources
             )
             for i, hxltag in enumerate(hxltags):
                 if hxltag in lev_hxltags:
@@ -903,9 +881,7 @@ class Runner:
         Returns:
             List[List]: Rows for a given level
         """
-        results = self.get_results(names, [level], overrides=overrides).get(
-            level
-        )
+        results = self.get_results(names, [level], overrides=overrides).get(level)
         rows = []
         if results:
             all_headers = results["headers"]
@@ -956,9 +932,7 @@ class Runner:
         else:
             main_index = 0
-        def add_level_results(
-            scraper_level, override_level, scrap, levels_used
-        ):
+        def add_level_results(scraper_level, override_level, scrap, levels_used):
             nonlocal values, sourcesinfo
             if scraper_level in levels_used:
@@ -1129,10 +1103,8 @@ class Runner:
                 levels_to_check = levels
             else:
                 levels_to_check = scraper.sources.keys()
-            scraper_should_overwrite_sources = (
-                scraper.source_configuration.get(
-                    "should_overwrite_sources", should_overwrite_sources
-                )
+            scraper_should_overwrite_sources = scraper.source_configuration.get(
+                "should_overwrite_sources", should_overwrite_sources
             )
             for level in levels_to_check:
                 Sources.add_sources_overwrite(
@@ -1145,9 +1117,7 @@ class Runner:
         add_additional_sources()
         return sources
-    def get_source_urls(
-        self, names: Optional[ListTuple[str]] = None
-    ) -> List[str]:
+    def get_source_urls(self, names: Optional[ListTuple[str]] = None) -> List[str]:
         """Get source urls for scrapers limiting to those in names if given.
         Args:
@@ -1260,9 +1230,7 @@ class Runner:
             if not hapi_resource_metadata:
                 return
             dataset_id = hapi_dataset_metadata["hdx_id"]
-            hapi_metadata = hapi_results.get(
-                dataset_id, copy(hapi_dataset_metadata)
-            )
+            hapi_metadata = hapi_results.get(dataset_id, copy(hapi_dataset_metadata))
             results = hapi_metadata.get("results", {})
             level_results = results.get(scraper_level)
             if level_results is None:

hdx/scraper/framework/scrapers/aggregator.py CHANGED Viewed

@@ -115,9 +115,7 @@ class Aggregator(BaseScraper):
         config_headers_or_hxltags = datasetinfo.get("input")
         if config_headers_or_hxltags:
             exists = True
-            for i, config_header_or_hxltag in enumerate(
-                config_headers_or_hxltags
-            ):
+            for i, config_header_or_hxltag in enumerate(config_headers_or_hxltags):
                 try:
                     input_headers[main_index].index(config_header_or_hxltag)
                 except ValueError:
@@ -218,9 +216,7 @@ class Aggregator(BaseScraper):
                 novals = 0
                 for valuestr in valuelist:
                     value = ""
-                    if isinstance(valuestr, int) or isinstance(
-                        valuestr, float
-                    ):
+                    if isinstance(valuestr, int) or isinstance(valuestr, float):
                         value = valuestr
                     else:
                         if valuestr:
@@ -359,9 +355,7 @@ class Aggregator(BaseScraper):
             if "source" not in self.datasetinfo:
                 self.datasetinfo["source"] = ",".join(sourceinfo["source"])
             if "source_url" not in self.datasetinfo:
-                self.datasetinfo["source_url"] = ",".join(
-                    sourceinfo["source_url"]
-                )
+                self.datasetinfo["source_url"] = ",".join(sourceinfo["source_url"])
         if "source" not in self.datasetinfo:
             return
         super().add_sources()

hdx/scraper/framework/scrapers/configurable_scraper.py CHANGED Viewed

@@ -129,9 +129,7 @@ class ConfigurableScraper(BaseScraper):
                     "input_keep": datasetinfo.get("input_keep", []),
                     "input_append": datasetinfo.get("input_append", []),
                     "sum": datasetinfo.get("sum"),
-                    "input_ignore_vals": datasetinfo.get(
-                        "input_ignore_vals", []
-                    ),
+                    "input_ignore_vals": datasetinfo.get("input_ignore_vals", []),
                     "output": datasetinfo.get("output", []),
                     "output_hxl": datasetinfo.get("output_hxl", []),
                 }
@@ -144,10 +142,7 @@ class ConfigurableScraper(BaseScraper):
         Returns:
             Tuple[List[str],Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary)
         """
-        if (
-            "filename" not in self.datasetinfo
-            and "file_prefix" not in self.datasetinfo
-        ):
+        if "filename" not in self.datasetinfo and "file_prefix" not in self.datasetinfo:
             self.datasetinfo["file_prefix"] = self.name
         return self.get_reader().read(self.datasetinfo, **self.variables)
@@ -162,9 +157,7 @@ class ConfigurableScraper(BaseScraper):
         if not date or use_date_from_date_col:
             date = self.rowparser.get_maxdate()
             if date == 0:
-                raise ValueError(
-                    "No date given in datasetinfo or as a column!"
-                )
+                raise ValueError("No date given in datasetinfo or as a column!")
             if self.rowparser.datetype == "date":
                 if not isinstance(date, datetime):
                     date = parse_date(date)
@@ -361,11 +354,7 @@ class ConfigurableScraper(BaseScraper):
                         else:
                             input_keep_index = -1
                         val = valdicts[j][adm][input_keep_index]
-                        if (
-                            val is None
-                            or val == ""
-                            or val in input_ignore_vals
-                        ):
+                        if val is None or val == "" or val in input_ignore_vals:
                             val = 0
                         else:
                             hasvalues = True
@@ -384,16 +373,12 @@ class ConfigurableScraper(BaseScraper):
                             for bracketed_str in matches.captures("rec"):
                                 if any(bracketed_str in x for x in valcols):
                                     continue
-                                _, hasvalues_t = text_replacement(
-                                    bracketed_str, adm
-                                )
+                                _, hasvalues_t = text_replacement(bracketed_str, adm)
                                 if not hasvalues_t:
                                     hasvalues = False
                                     break
                         if hasvalues:
-                            formula, hasvalues_t = text_replacement(
-                                process_col, adm
-                            )
+                            formula, hasvalues_t = text_replacement(process_col, adm)
                             if hasvalues_t:
                                 formula = formula.replace(
                                     "#population",
@@ -431,20 +416,14 @@ class ConfigurableScraper(BaseScraper):
                                 continue
                             for j, valdict in enumerate(valdicts):
                                 val = valdict[adm][i]
-                                if (
-                                    val is None
-                                    or val == ""
-                                    or val in input_ignore_vals
-                                ):
+                                if val is None or val == "" or val in input_ignore_vals:
                                     continue
                                 newvaldicts[j][adm] = eval(
                                     f"newvaldicts[j].get(adm, 0.0) + {str(valdict[adm][i])}"
                                 )
                     formula = formula.replace("#population", "#pzbgvjh")
                     for i in sorted_len_indices:
-                        formula = formula.replace(
-                            valcols[i], f"newvaldicts[{i}][adm]"
-                        )
+                        formula = formula.replace(valcols[i], f"newvaldicts[{i}][adm]")
                     formula = formula.replace("#pzbgvjh", population_str)
                     for adm in valdicts[0]:
                         try:
@@ -470,14 +449,10 @@ class ConfigurableScraper(BaseScraper):
         header_to_hxltag = self.use_hxl(None, file_headers, iterator)
         if "source_url" not in self.datasetinfo:
             self.datasetinfo["source_url"] = self.datasetinfo["url"]
-        source_date = Sources.standardise_datasetinfo_source_date(
-            self.datasetinfo
-        )
+        source_date = Sources.standardise_datasetinfo_source_date(self.datasetinfo)
         if not source_date or self.datasetinfo.get("force_date_today", False):
             source_date = self.today
-            self.datasetinfo["source_date"] = {
-                "default_date": {"end": source_date}
-            }
+            self.datasetinfo["source_date"] = {"default_date": {"end": source_date}}
         self.rowparser = RowParser(
             self.name,
             self.countryiso3s,

hdx/scraper/framework/scrapers/rowparser.py CHANGED Viewed

@@ -115,9 +115,7 @@ class RowParser:
             self.maxdates = {i: date for i, _ in enumerate(subsets)}
         else:
             if self.datelevel > len(self.admcols):
-                raise ValueError(
-                    "No admin columns specified for required level_type!"
-                )
+                raise ValueError("No admin columns specified for required level_type!")
             self.maxdates = {
                 i: {adm: date for adm in self.adms[self.datelevel]}
                 for i, _ in enumerate(subsets)
@@ -150,9 +148,7 @@ class RowParser:
                         header = hxltag.display_tag
                     else:
                         header = hxltag.header
-                    dict_of_lists_add(
-                        self.filters, header, row.get("#country+code")
-                    )
+                    dict_of_lists_add(self.filters, header, row.get("#country+code"))
     def get_filter_str_for_eval(self, filter: str) -> str:
         """Replace filter string variables with columns in row of data
@@ -229,9 +225,7 @@ class RowParser:
                 newrow[self.header_to_hxltag[header]] = row[header]
             yield newrow
-    def stop_rows(
-        self, iterator: Iterator[Dict]
-    ) -> Generator[Dict, None, None]:
+    def stop_rows(self, iterator: Iterator[Dict]) -> Generator[Dict, None, None]:
         """Stop processing rows after condition met
         Args:

hdx/scraper/framework/scrapers/timeseries.py CHANGED Viewed

@@ -46,9 +46,7 @@ class TimeSeries(BaseScraper):
         datetype = self.datasetinfo["date_type"]
         ignore_future_date = self.datasetinfo.get("ignore_future_date", True)
         headers = [datecol] + self.datasetinfo["output"]
-        hxltags = [self.datasetinfo["date_hxl"]] + self.datasetinfo[
-            "output_hxl"
-        ]
+        hxltags = [self.datasetinfo["date_hxl"]] + self.datasetinfo["output_hxl"]
         rows = [headers, hxltags]
         file_headers, iterator = self.get_reader().read(
             self.datasetinfo, file_prefix=self.name

hdx/scraper/framework/utilities/hapi_admins.py CHANGED Viewed

@@ -31,8 +31,20 @@ def complete_admins(
     warnings = []
     child = None
     adm_level = len(provider_adm_names)
+    def check_unknown_pcode(adm_code: str, pcode: str) -> str:
+        if pcode:
+            warnings.append(f"PCode unknown {adm_code}->{pcode} ({warntxt})")
+            return pcode
+        else:
+            warnings.append(f"PCode unknown {adm_code}->''")
+            return ""
     for i, provider_adm_name in reversed(list(enumerate(provider_adm_names))):
         adm_code = adm_codes[i]
+        parent = admins[i].pcode_to_parent.get(adm_code)
+        if not parent and i > 0:
+            parent = adm_codes[i - 1]
         if not provider_adm_name:
             provider_adm_name = ""
             provider_adm_names[i] = ""
@@ -40,9 +52,6 @@ def complete_admins(
             pcode = admins[i + 1].pcode_to_parent.get(child)
             warntxt = "parent"
         elif provider_adm_name:
-            parent = admins[i].pcode_to_parent.get(adm_code)
-            if not parent and i > 0:
-                parent = adm_codes[i - 1]
             pcode, _ = admins[i].get_pcode(
                 countryiso3,
                 provider_adm_name,
@@ -54,24 +63,23 @@ def complete_admins(
             pcode = None
         if adm_code:
             if adm_code not in admins[i].pcodes:
-                if pcode:
-                    warnings.append(
-                        f"PCode unknown {adm_code}->{pcode} ({warntxt})"
+                if admins[i].looks_like_pcode(adm_code):
+                    adj_adm_code = admins[i].convert_admin_pcode_length(
+                        countryiso3, adm_code, parent=parent
                     )
-                    adm_code = pcode
+                    if adj_adm_code:
+                        warnings.append(f"PCode length {adm_code}->{adj_adm_code}")
+                        adm_code = adj_adm_code
+                    else:
+                        adm_code = check_unknown_pcode(adm_code, pcode)
                 else:
-                    warnings.append(f"PCode unknown {adm_code}->''")
-                    adm_code = ""
+                    adm_code = check_unknown_pcode(adm_code, pcode)
             elif pcode and adm_code != pcode:
                 if child:
-                    warnings.append(
-                        f"PCode mismatch {adm_code}->{pcode} ({warntxt})"
-                    )
+                    warnings.append(f"PCode mismatch {adm_code}->{pcode} ({warntxt})")
                     adm_code = pcode
                 else:
-                    warnings.append(
-                        f"PCode mismatch {adm_code} != {provider_adm_name}"
-                    )
+                    warnings.append(f"PCode mismatch {adm_code} != {provider_adm_name}")
         elif pcode:
             adm_code = pcode
         else:

hdx/scraper/framework/utilities/lookup.py CHANGED Viewed

@@ -21,9 +21,7 @@ class Lookup:
     """
     def __init__(self, yaml_config_path: str, classobject: Type):
-        configuration = load_yaml(
-            script_dir_plus_file(yaml_config_path, classobject)
-        )
+        configuration = load_yaml(script_dir_plus_file(yaml_config_path, classobject))
         self._configuration = configuration
         initial_lookup = configuration.get("initial_lookup", {})
         self._code_lookup = copy(initial_lookup)
@@ -91,9 +89,7 @@ class Lookup:
             unmatched=self._unmatched,
         )
-    def get_name(
-        self, code: str, default: Optional[str] = None
-    ) -> Optional[str]:
+    def get_name(self, code: str, default: Optional[str] = None) -> Optional[str]:
         """Get name from code
         Args:

hdx/scraper/framework/utilities/reader.py CHANGED Viewed

@@ -194,9 +194,7 @@ class Read(Retrieve):
             today=self.today,
         )
-    def setup_tabular(
-        self, datasetinfo: Dict, kwargs: Dict
-    ) -> Union[str, List]:
+    def setup_tabular(self, datasetinfo: Dict, kwargs: Dict) -> Union[str, List]:
         """Setup kwargs for tabular source eg. csv, xls, xlsx from
         datasetinfo and return url.
@@ -330,9 +328,7 @@ class Read(Retrieve):
             for file_path in sorted(glob.glob(f"{saved_path}_*.json")):
                 datasets.append(Dataset.load_from_json(file_path))
         else:
-            datasets = Dataset.search_in_hdx(
-                query, configuration, page_size, **kwargs
-            )
+            datasets = Dataset.search_in_hdx(query, configuration, page_size, **kwargs)
             if self.save:
                 for i, dataset in enumerate(datasets):
                     file_path = f"{saved_path}_{i}.json"
@@ -382,9 +378,7 @@ class Read(Retrieve):
         path = self.download_file(url, **kwargs)
         return url, path
-    def download_resource(
-        self, resource: Resource, **kwargs: Any
-    ) -> Tuple[str, str]:
+    def download_resource(self, resource: Resource, **kwargs: Any) -> Tuple[str, str]:
         """Download HDX resource os a file and return the url downloaded and
         the path of the file. The filename of the file comes from the name and
         format.
@@ -471,9 +465,7 @@ class Read(Retrieve):
             data.display_tags
             return data
         except hxl.HXLException:
-            logger.warning(
-                f"Could not process {url}. Maybe there are no HXL tags?"
-            )
+            logger.warning(f"Could not process {url}. Maybe there are no HXL tags?")
             return None
         except Exception:
             logger.exception(f"Error reading {url}!")
@@ -495,14 +487,10 @@ class Read(Retrieve):
             Optional[Dict]: Information about file or None
         """
         try:
-            _, path = self.construct_filename_and_download(
-                name, format, url, **kwargs
-            )
+            _, path = self.construct_filename_and_download(name, format, url, **kwargs)
             return hxl.info(path, InputOptions(allow_local=True))
         except hxl.HXLException:
-            logger.warning(
-                f"Could not process {url}. Maybe there are no HXL tags?"
-            )
+            logger.warning(f"Could not process {url}. Maybe there are no HXL tags?")
             return None
         except Exception:
             logger.exception(f"Error reading {url}!")
@@ -586,23 +574,21 @@ class Read(Retrieve):
                 else:
                     url = resource["url"]  # otherwise set the url key in
                     # datasetinfo to the resource url (by setting url here)
-                datasetinfo["hapi_resource_metadata"] = (
-                    self.get_hapi_resource_metadata(resource)
+                datasetinfo["hapi_resource_metadata"] = self.get_hapi_resource_metadata(
+                    resource
                 )
                 datasetinfo["url"] = url
             if "source_date" not in datasetinfo:
-                datasetinfo["source_date"] = (
-                    get_startend_dates_from_time_period(
-                        dataset, today=self.today
-                    )
+                datasetinfo["source_date"] = get_startend_dates_from_time_period(
+                    dataset, today=self.today
                 )
             if "source" not in datasetinfo:
                 datasetinfo["source"] = dataset["dataset_source"]
             if "source_url" not in datasetinfo:
                 datasetinfo["source_url"] = dataset.get_hdx_url()
             Sources.standardise_datasetinfo_source_date(datasetinfo)
-            datasetinfo["hapi_dataset_metadata"] = (
-                self.get_hapi_dataset_metadata(dataset, datasetinfo)
+            datasetinfo["hapi_dataset_metadata"] = self.get_hapi_dataset_metadata(
+                dataset, datasetinfo
             )
             return resource
@@ -669,18 +655,14 @@ class Read(Retrieve):
         Returns:
             Tuple[List[str],Iterator[Dict]]: Tuple (headers, iterator where each row is a dictionary)
         """
-        resource = self.read_hdx_metadata(
-            datasetinfo, configuration=configuration
-        )
+        resource = self.read_hdx_metadata(datasetinfo, configuration=configuration)
         filename = kwargs.get("filename")
         if filename:
             del kwargs["filename"]
             datasetinfo["filename"] = filename
         filename = datasetinfo.get("filename")
         if resource and not filename:
-            filename = self.construct_filename(
-                resource["name"], resource.get_format()
-            )
+            filename = self.construct_filename(resource["name"], resource.get_format())
             file_prefix = kwargs.get("file_prefix")
             if not file_prefix:
                 file_prefix = datasetinfo.get("file_prefix")
@@ -708,13 +690,9 @@ class Read(Retrieve):
         format = datasetinfo["format"]
         if format in ["json", "csv", "xls", "xlsx"]:
             if "dataset" in datasetinfo:
-                headers, iterator = self.read_hdx(
-                    datasetinfo, configuration, **kwargs
-                )
+                headers, iterator = self.read_hdx(datasetinfo, configuration, **kwargs)
             else:
                 headers, iterator = self.read_tabular(datasetinfo, **kwargs)
         else:
-            raise ValueError(
-                f"Invalid format {format} for {datasetinfo['name']}!"
-            )
+            raise ValueError(f"Invalid format {format} for {datasetinfo['name']}!")
         return headers, iterator

hdx/scraper/framework/utilities/sources.py CHANGED Viewed

@@ -91,9 +91,7 @@ class Sources:
                 else:
                     if isinstance(value, dict):
                         for startend, date in value.items():
-                            set_source_date(
-                                date, hxltag=key, startend=startend
-                            )
+                            set_source_date(date, hxltag=key, startend=startend)
                     else:
                         set_source_date(value, hxltag=key)
         else:
@@ -214,9 +212,7 @@ class Sources:
                 index = hxltags.index(hxltag)
                 sources[index] = source
             else:
-                logger.warning(
-                    f"Keeping existing source information for {hxltag}!"
-                )
+                logger.warning(f"Keeping existing source information for {hxltag}!")
         else:
             hxltags.append(hxltag)
             sources.append(source)
@@ -282,9 +278,7 @@ class Sources:
         if no_sources:
             source_configuration["no_sources"] = True
             return source_configuration
-        source_configuration["should_overwrite_sources"] = (
-            should_overwrite_sources
-        )
+        source_configuration["should_overwrite_sources"] = should_overwrite_sources
         if suffix_attribute:
             source_configuration["suffix_attribute"] = suffix_attribute
             return source_configuration

hdx/scraper/framework/utilities/writer.py CHANGED Viewed

@@ -309,9 +309,7 @@ class Writer:
             fns.append(region_fn)
-        rows = self.runner.get_rows(
-            level, countries, headers, fns, names=names
-        )
+        rows = self.runner.get_rows(level, countries, headers, fns, names=names)
         if rows:
             self.update(tab, rows)

{hdx_python_scraper-2.6.2.dist-info → hdx_python_scraper-2.6.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hdx-python-scraper
-Version: 2.6.2
+Version: 2.6.4
 Summary: HDX Python scraper utilities to assemble data from multiple sources
 Project-URL: Homepage, https://github.com/OCHA-DAP/hdx-python-scraper
 Author-email: Michael Rans <rans@email.com>
@@ -26,15 +26,18 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.8
 Requires-Dist: gspread
-Requires-Dist: hdx-python-api>=6.3.8
-Requires-Dist: hdx-python-country>=3.8.8
-Requires-Dist: hdx-python-utilities>=3.8.3
+Requires-Dist: hdx-python-api>=6.3.9
+Requires-Dist: hdx-python-country>=3.9.2
+Requires-Dist: hdx-python-utilities>=3.8.6
 Requires-Dist: regex
 Provides-Extra: dev
 Requires-Dist: pre-commit; extra == 'dev'
+Provides-Extra: docs
+Requires-Dist: mkapi; extra == 'docs'
 Provides-Extra: pandas
-Requires-Dist: pandas>=2.2.2; extra == 'pandas'
+Requires-Dist: pandas>=2.2.3; extra == 'pandas'
 Provides-Extra: test
+Requires-Dist: pandas>=2.2.3; extra == 'test'
 Requires-Dist: pytest; extra == 'test'
 Requires-Dist: pytest-cov; extra == 'test'
 Description-Content-Type: text/markdown

hdx_python_scraper-2.6.4.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,31 @@
+hdx/scraper/framework/__init__.py,sha256=11ozJKiUsqDCZ3_mcAHhGYUyGK_Unl54djVSBBExFB4,59
+hdx/scraper/framework/_version.py,sha256=a5nalDjLY2yvq7ieXFfR076fN3sJh2mCxFSXqRSIcE0,511
+hdx/scraper/framework/base_scraper.py,sha256=bv9FguvOD40nulgC16zmOsxyg3iAPUDn_zM1V-MVvSY,15292
+hdx/scraper/framework/runner.py,sha256=M6YqiZvOvCewlGn2E0ksslkK7ZHRiWGnRVwQjus805c,53087
+hdx/scraper/framework/outputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hdx/scraper/framework/outputs/base.py,sha256=VASnjmw8yM_-c0-G5Ku2gDTuQiYcFxkE27i7jWJfg4c,2563
+hdx/scraper/framework/outputs/excelfile.py,sha256=dkyaI3nKUTn_tpVTbb7NB8F3sKzZQ-7U8l825EdzQ18,2196
+hdx/scraper/framework/outputs/googlesheets.py,sha256=--mri4hhWslfshcVExlobnHgt87aaAtGrmzW2RAk4Ic,3040
+hdx/scraper/framework/outputs/json.py,sha256=NPOMfrG0brIPf3B7NENi-6LdCDbso-K-nPMAVMVa7JU,9455
+hdx/scraper/framework/scrapers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+hdx/scraper/framework/scrapers/aggregator.py,sha256=pPKu8QR0_GWRhpSqPBA6bd0KKq-WgN6AEtv9_8gj9d8,14858
+hdx/scraper/framework/scrapers/configurable_scraper.py,sha256=OW_Y5ESVb91hWMtpvZQAqXxP-VyPt9Af5IGvZk0xSuE,19994
+hdx/scraper/framework/scrapers/resource_downloader.py,sha256=ZuSc5L4X4LWcWKniHS5BDmMuM97H8kWCzB1H_PNceGc,1548
+hdx/scraper/framework/scrapers/rowparser.py,sha256=_xImgr6gXXfLRcLu1xEcXmSr6OCCsctkJXfKuzkw97w,15515
+hdx/scraper/framework/scrapers/timeseries.py,sha256=FYk5-MoOnvAa6ym5UWNUWHLpKmJNgHBk1La_nHap18c,3004
+hdx/scraper/framework/utilities/__init__.py,sha256=-zOJzat-fbv427FBIKDnWLs2QStXTBZahiNy_-pgPOc,2390
+hdx/scraper/framework/utilities/fallbacks.py,sha256=t8oKE3_3I6fX4-kzvAdRIhdjg-9vWBGE6shd2_EvC4c,6184
+hdx/scraper/framework/utilities/hapi_admins.py,sha256=XuZXAnkooCLg4tSKZfPqY4bK0rzBALejPxac7IFdwW4,4093
+hdx/scraper/framework/utilities/lookup.py,sha256=WnZa3lY4matfAIsr-GnxurmYndBVbtzbcM9Twm7-4Ho,3483
+hdx/scraper/framework/utilities/org_type.py,sha256=euQyRV01yA8kJ3nMFvZxnTRLnvCuxgV1ZZQx8gEOB8Y,183
+hdx/scraper/framework/utilities/org_type_configuration.yaml,sha256=tTordLPgnE90FSJzbVJPEnE06KyhlQBsPlIu1IAw3iw,1841
+hdx/scraper/framework/utilities/reader.py,sha256=VYi92sAxqZD0nFn9q8OSEEcxXtELNB6DNCF838ES0x4,27415
+hdx/scraper/framework/utilities/region_lookup.py,sha256=82tl1A2GLcxhiTqd1etTpxE5T6anbM-9dHih2ZlN00o,3916
+hdx/scraper/framework/utilities/sector.py,sha256=XGysivvPhTqQfK6z1y96sDJATk3zx7sS_qGqCa4PbaI,177
+hdx/scraper/framework/utilities/sector_configuration.yaml,sha256=VKddsahminPOc3QKKieb1DvaYXkdPdhT5cPAL9_HjDw,4940
+hdx/scraper/framework/utilities/sources.py,sha256=0aW0IbH8nsViDixjD-fIh3gO86vwklYkPU8cXxmJkz4,11379
+hdx/scraper/framework/utilities/writer.py,sha256=yJQ_HcJj-l6DJW5Fl8nr1f3wLIJjogxmF22IU4ysj4c,16673
+hdx_python_scraper-2.6.4.dist-info/METADATA,sha256=fnetPHzz1YtmMbyuOzA97cYlh2GBNowGWiii_adRU-k,3466
+hdx_python_scraper-2.6.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+hdx_python_scraper-2.6.4.dist-info/licenses/LICENSE,sha256=wc-4GpMn-ODs-U_bTe1YCiPVgvcjzrpYOx2wPuyAeII,1079
+hdx_python_scraper-2.6.4.dist-info/RECORD,,

hdx_python_scraper-2.6.2.dist-info/RECORD DELETED Viewed

@@ -1,31 +0,0 @@
-hdx/scraper/framework/__init__.py,sha256=11ozJKiUsqDCZ3_mcAHhGYUyGK_Unl54djVSBBExFB4,59
-hdx/scraper/framework/_version.py,sha256=U2b7313v-bM1h69WtyleJ6hXm9RX-9buCsEK4Qgb1fg,411
-hdx/scraper/framework/base_scraper.py,sha256=J7AHhDFBehENragRvpZnV8Qi7IcfFql9U_UU1svNr5o,15424
-hdx/scraper/framework/runner.py,sha256=an0c_tz46PCnyyUk0dgDV8xfkb7F3LHMUIhPyPSU8sM,53499
-hdx/scraper/framework/outputs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hdx/scraper/framework/outputs/base.py,sha256=VASnjmw8yM_-c0-G5Ku2gDTuQiYcFxkE27i7jWJfg4c,2563
-hdx/scraper/framework/outputs/excelfile.py,sha256=dkyaI3nKUTn_tpVTbb7NB8F3sKzZQ-7U8l825EdzQ18,2196
-hdx/scraper/framework/outputs/googlesheets.py,sha256=gw9VM2UM3D6N7saUWaXiU2H_ihZn40b6J227I7t-SQs,3086
-hdx/scraper/framework/outputs/json.py,sha256=nINV-P5gTGCrtq-zEWYT_Si4ggqhJvUWHCQHxTHUh64,9493
-hdx/scraper/framework/scrapers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-hdx/scraper/framework/scrapers/aggregator.py,sha256=zC6kmk6hC7xZ-j72R75VEppJKk6kIF5RE18KYBoavX0,14972
-hdx/scraper/framework/scrapers/configurable_scraper.py,sha256=Q9AeThIHZwWQetCejrWKOK3RH1Fh1RqXhbU4qDbq2d4,20608
-hdx/scraper/framework/scrapers/resource_downloader.py,sha256=ZuSc5L4X4LWcWKniHS5BDmMuM97H8kWCzB1H_PNceGc,1548
-hdx/scraper/framework/scrapers/rowparser.py,sha256=j_FxNYzhSB9QmBAzqNhuNVCjlJP4cJKS1l4qLzXG5qE,15613
-hdx/scraper/framework/scrapers/timeseries.py,sha256=w0Ejg4nXaLSuq32zx7vlULMZuDZaTqRO94Dm-VKA9co,3026
-hdx/scraper/framework/utilities/__init__.py,sha256=-zOJzat-fbv427FBIKDnWLs2QStXTBZahiNy_-pgPOc,2390
-hdx/scraper/framework/utilities/fallbacks.py,sha256=t8oKE3_3I6fX4-kzvAdRIhdjg-9vWBGE6shd2_EvC4c,6184
-hdx/scraper/framework/utilities/hapi_admins.py,sha256=k_VZtDuSDL3h_0RwZqu9x_fckRnvfQ62KPv5Q-IMVEo,3664
-hdx/scraper/framework/utilities/lookup.py,sha256=4IkO35eBN5xz2H5y688C3L80zEvTjrHQgYiM4dFVpPo,3519
-hdx/scraper/framework/utilities/org_type.py,sha256=euQyRV01yA8kJ3nMFvZxnTRLnvCuxgV1ZZQx8gEOB8Y,183
-hdx/scraper/framework/utilities/org_type_configuration.yaml,sha256=tTordLPgnE90FSJzbVJPEnE06KyhlQBsPlIu1IAw3iw,1841
-hdx/scraper/framework/utilities/reader.py,sha256=wM878LqmsuDfsbIGmDmrESRfSfRhJS2POF-dnQ1Ix58,27735
-hdx/scraper/framework/utilities/region_lookup.py,sha256=82tl1A2GLcxhiTqd1etTpxE5T6anbM-9dHih2ZlN00o,3916
-hdx/scraper/framework/utilities/sector.py,sha256=XGysivvPhTqQfK6z1y96sDJATk3zx7sS_qGqCa4PbaI,177
-hdx/scraper/framework/utilities/sector_configuration.yaml,sha256=VKddsahminPOc3QKKieb1DvaYXkdPdhT5cPAL9_HjDw,4940
-hdx/scraper/framework/utilities/sources.py,sha256=3miKn_iruWpfpBA-7R9jFt6_EdfX1zvW4PvjifOCd7s,11503
-hdx/scraper/framework/utilities/writer.py,sha256=YjOhVo3Ks0I5WH7oyM2Q7fO6ImGabYZ2CBhbYw_A0Kk,16695
-hdx_python_scraper-2.6.2.dist-info/METADATA,sha256=KXHMz2OhUVSO1K02K7_bAdY91Nt0sagGcYcIbjr8iKU,3361
-hdx_python_scraper-2.6.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-hdx_python_scraper-2.6.2.dist-info/licenses/LICENSE,sha256=wc-4GpMn-ODs-U_bTe1YCiPVgvcjzrpYOx2wPuyAeII,1079
-hdx_python_scraper-2.6.2.dist-info/RECORD,,

{hdx_python_scraper-2.6.2.dist-info → hdx_python_scraper-2.6.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{hdx_python_scraper-2.6.2.dist-info → hdx_python_scraper-2.6.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

hdx-python-scraper 2.6.2__py3-none-any.whl → 2.6.4__py3-none-any.whl

hdx-python-scraper 2.6.2py3-none-any.whl → 2.6.4py3-none-any.whl