PyPI - hestia-earth-models - Versions diffs - 0.61.7__py3-none-any.whl → 0.61.8__py3-none-any.whl - Mend

hestia-earth-models 0.61.7py3-none-any.whl → 0.61.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hestia-earth-models might be problematic. Click here for more details.

Files changed (43) hide show

hestia_earth/models/utils/descriptive_stats.py ADDED Viewed

@@ -0,0 +1,285 @@
+from collections.abc import Iterable
+from enum import Enum
+from functools import reduce
+from numpy import abs, around, exp, float64, inf, pi, sign, sqrt
+from numpy.typing import NDArray
+from typing import Optional, Union
+def calc_z_critical(
+    confidence_interval: float,
+    n_sided: int = 2
+) -> float64:
+    """
+    Calculate the z-critical value from the confidence interval.
+    Parameters
+    ----------
+    confidence_interval : float
+        The confidence interval as a percentage between 0 and 100%.
+    n_sided : int, optional
+        The number of tails (default value = `2`).
+    Returns
+    -------
+    float64
+        The z-critical value as a floating point between 0 and infinity.
+    """
+    alpha = 1 - confidence_interval / 100
+    return _normal_ppf(1 - alpha / n_sided)
+def _normal_ppf(q: float64, tol: float64 = 1e-10) -> float64:
+    """
+    Calculates the percent point function (PPF), also known as the inverse cumulative distribution function (CDF), of a
+    standard normal distribution using the Newton-Raphson method.
+    Parameters
+    ----------
+    q : float64
+        The quantile at which to evaluate the PPF.
+    tol : float64, optional
+        The tolerance for the Newton-Raphson method. Defaults to 1e-10.
+    Returns
+    -------
+    float64
+        The PPF value at the given quantile.
+    """
+    INITIAL_GUESS = 0
+    MAX_ITER = 100
+    def step(x):
+        """Perform one step of the Newton-Raphson method."""
+        x_new = x - (_normal_cdf(x) - q) / _normal_pdf(x)
+        return x_new if abs(x_new - x) >= tol else x
+    return (
+        inf if q == 1 else
+        -inf if q == 0 else
+        reduce(lambda x, _: step(x), range(MAX_ITER), INITIAL_GUESS)
+    )
+def _normal_cdf(x: float64) -> float64:
+    """
+    Calculates the cumulative distribution function (CDF) of a standard normal distribution for a single value using a
+    custom error function (erf).
+    Parameters
+    ----------
+    x : float64
+        The point at which to evaluate the CDF.
+    Returns
+    -------
+    float64
+        The CDF value at the given point.
+    """
+    return 0.5 * (1 + _erf(x / sqrt(2)))
+def _erf(x: float64) -> float64:
+    """
+    Approximates the error function of a standard normal distribution using a numerical approximation based on
+    Abramowitz and Stegun formula 7.1.26.
+    Parameters
+    ----------
+    x : float64
+        The input value.
+    Returns
+    -------
+    float64
+        The approximated value of the error function.
+    """
+    # constants
+    A_1 = 0.254829592
+    A_2 = -0.284496736
+    A_3 = 1.421413741
+    A_4 = -1.453152027
+    A_5 = 1.061405429
+    P = 0.3275911
+    # Save the sign of x
+    sign_ = sign(x)
+    x_ = abs(x)
+    # A&S formula 7.1.26
+    t = 1.0 / (1.0 + P * x_)
+    y = 1.0 - (((((A_5 * t + A_4) * t) + A_3) * t + A_2) * t + A_1) * t * exp(-x_ * x_)
+    return sign_ * y
+def _normal_pdf(x: float64) -> float64:
+    """
+    Calculates the probability density function (PDF) of a standard normal distribution for a single value.
+    Parameters
+    ----------
+    x : float64
+        The point at which to evaluate the PDF.
+    Returns
+    -------
+    float64
+        The PDF value at the given point.
+    """
+    return 1 / sqrt(2 * pi) * exp(-0.5 * x**2)
+def _calc_confidence_level(
+    z_critical: float64,
+    n_sided: int = 2
+) -> float64:
+    """
+    Calculate the confidence interval from the z-critical value.
+    Parameters
+    ----------
+    z_critical_value : np.float64
+        The confidence interval as a floating point number between 0 and infinity.
+    n_sided : int, optional
+        The number of tails (default value = `2`).
+    Returns
+    -------
+    np.float64
+        The confidence interval as a percentage between 0 and 100%.
+    """
+    alpha = (1 - _normal_cdf(z_critical)) * n_sided
+    return (1 - alpha) * 100
+def calc_required_iterations_monte_carlo(
+    confidence_level: float,
+    precision: float,
+    sd: float
+) -> int:
+    """
+    Calculate the number of iterations required for a Monte Carlo simulation to have a desired precision, subject to a
+    given confidence level.
+    Parameters
+    ----------
+    confidence_level : float
+        The confidence level, as a percentage out of 100, that the precision should be subject too (i.e., we are x%
+        sure that the sample mean deviates from the true populatation mean by less than the desired precision).
+    precision : float
+        The desired precision as a floating point value (i.e., if the Monte Carlo simulation will be used to estimate
+        `organicCarbonPerHa` to a precision of 100 kg C ha-1 this value should be 100).
+    sd : float
+        The standard deviation of the sample. This can be estimated by running the model 500 times (a number that does
+        not take too much time to run but is large enough for the sample standard deviation to converge reasonably
+        well).
+    Returns
+    -------
+    int
+        The required number of iterations.
+    """
+    z_critical_value = calc_z_critical(confidence_level)
+    return round(((sd * z_critical_value) / precision) ** 2)
+def calc_confidence_level_monte_carlo(
+    n_iterations: int,
+    precision: float,
+    sd: float
+) -> float:
+    """
+    Calculate the confidence level that the sample mean calculated by the Monte Carlo simulation deviates from the
+    true population mean by less than the desired precision.
+    Parameters
+    ----------
+    n_iterations : int
+        The number of iterations that the Monte Carlo simulation was run for.
+    precision : float
+        The desired precision as a floating point value (i.e., if the Monte Carlo simulation will be used to estimate
+        `organicCarbonPerHa` to a precision of 100 kg C ha-1 this value should be 100).
+    sd : float
+        The standard deviation of the sample.
+    Returns
+    -------
+    float
+        The confidence level, as a percentage out of 100, that the precision should be subject too (i.e., we are x%
+        sure that the sample mean deviates from the true populatation mean by less than the desired precision).
+    """
+    return _calc_confidence_level(precision*sqrt(n_iterations)/sd)
+def calc_precision_monte_carlo(
+    confidence_level: float,
+    n_iterations: int,
+    sd: float
+) -> float:
+    """
+    Calculate the +/- precision of a Monte Carlo simulation for a desired confidence level.
+    Parameters
+    ----------
+    confidence_level : float
+        The confidence level, as a percentage out of 100, that the precision should be subject too (i.e., we are x%
+        sure that the sample mean deviates from the true populatation mean by less than the desired precision).
+    n_iterations : int
+        The number of iterations that the Monte Carlo simulation was run for.
+    sd : float
+        The standard deviation of the sample.
+    Returns
+    -------
+    float
+        The precision of the sample mean estimated by the Monte Carlo model as a floating point value with the same
+        units as the estimated mean.
+    """
+    z_critical = calc_z_critical(confidence_level)
+    return (sd*z_critical)/sqrt(n_iterations)
+def calc_descriptive_stats(
+    arr: NDArray,
+    stats_definition: Union[Enum, str],
+    axis: Optional[int] = None,
+    decimals: int = 6
+) -> dict:
+    """
+    Calculate the descriptive stats for an array row-wise, round them to specified number of decimal places and return
+    them formatted for a HESTIA node.
+    Parameters
+    ----------
+    arr : NDArray
+    stats_definition : Enum | str
+    axis : int | None
+    decimals : int
+    Returns
+    -------
+    float
+        The precision of the sample mean estimated by the Monte Carlo model as a floating point value with the same
+        units as the estimated mean.
+    """
+    value = around(arr.mean(axis=axis), decimals)
+    sd = around(arr.std(axis=axis), decimals)
+    min_ = around(arr.min(axis=axis), decimals)
+    max_ = around(arr.max(axis=axis), decimals)
+    rows, columns = arr.shape
+    observations = (
+        [rows] * columns if axis == 0
+        else [columns] * rows if axis == 1
+        else [arr.size]
+    )
+    return {
+        "value": list(value) if isinstance(value, Iterable) else [value],
+        "sd": list(sd) if isinstance(sd, Iterable) else [sd],
+        "min": list(min_) if isinstance(min_, Iterable) else [min_],
+        "max": list(max_) if isinstance(max_, Iterable) else [max_],
+        "statsDefinition": stats_definition.value if isinstance(stats_definition, Enum) else stats_definition,
+        "observations": observations
+    }

hestia_earth/models/utils/emission.py CHANGED Viewed

@@ -1,13 +1,19 @@
-from hestia_earth.schema import SchemaType
+from collections.abc import Iterable
+from typing import Optional, Union
+from hestia_earth.schema import EmissionMethodTier, SchemaType
 from hestia_earth.utils.api import download_hestia
 from hestia_earth.utils.model import linked_node
 from hestia_earth.utils.lookup import get_table_value, download_lookup, column_name
-from . import _term_id, _include_methodModel
+from . import _term_id, _include_methodModel, flatten_args
 from .blank_node import find_terms_value
 from .constant import Units, get_atomic_conversion
+EMISSION_METHOD_TIERS = [e.value for e in EmissionMethodTier]
 def _new_emission(term, model=None):
     node = {'@type': SchemaType.EMISSION.value}
     node['term'] = linked_node(term if isinstance(term, dict) else download_hestia(_term_id(term)))
@@ -32,3 +38,68 @@ def get_nh3_no3_nox_to_n(cycle: dict, nh3_term_id: str, no3_term_id: str, nox_te
     nox = None if nox is None else nox / get_atomic_conversion(Units.KG_NOX, Units.TO_N)
     return (nh3, no3, nox)
+_EMISSION_METHOD_TIER_RANKING = [
+    EmissionMethodTier.MEASURED,
+    EmissionMethodTier.TIER_3,
+    EmissionMethodTier.TIER_2,
+    EmissionMethodTier.TIER_1,
+    EmissionMethodTier.BACKGROUND,
+    EmissionMethodTier.NOT_RELEVANT
+]
+"""
+A ranking of `EmissionMethodTier`s from strongest to weakest.
+"""
+_EmissionMethodTiers = Union[EmissionMethodTier, str, Iterable[Union[EmissionMethodTier, str]]]
+"""
+A type alias for a single emission method tier, as either an EmissionMethodTier enum or string, or multiple emission
+method tiers, as either an iterable of EmissionMethodTier enums or strings.
+"""
+def min_emission_method_tier(*methods: _EmissionMethodTiers) -> EmissionMethodTier:
+    """
+    Get the minimum ranking emission method tier from the provided methods.
+    n.b., `max` function is used as weaker methods have higher indices.
+    Parameters
+    ----------
+    *methods : EmissionMethodTier | str | Iterable[EmissionMethodTier] | Iterable[str]
+        Emission method tiers or iterables of emission method tiers.
+    Returns
+    -------
+    EmissionMethodTier
+        The emission method tier method with the minimum ranking.
+    """
+    methods_ = [to_emission_method_tier(arg) for arg in flatten_args(methods)]
+    return max(
+        methods_,
+        key=lambda method: _EMISSION_METHOD_TIER_RANKING.index(method),
+        default=_EMISSION_METHOD_TIER_RANKING[-1]
+    )
+def to_emission_method_tier(method: Union[EmissionMethodTier, str]) -> Optional[EmissionMethodTier]:
+    """
+    Convert the input str to an `EmissionMethodTier` if possible.
+    Parameters
+    ----------
+    method : EmissionMethodTier | str
+        The emission method tier as either a `str` or `EmissionMethodTier`.
+    Returns
+    -------
+    EmissionMethodTier | None
+        The matching `EmissionMethodTier` or `None` if invalid string.
+    """
+    return (
+        method if isinstance(method, EmissionMethodTier)
+        else EmissionMethodTier(method) if method in EMISSION_METHOD_TIERS
+        else None
+    )

hestia_earth/models/utils/inorganicFertiliser.py CHANGED Viewed

@@ -31,14 +31,14 @@ def get_NH3_emission_factor(term_id: str, soilPh: float, temperature: float):
     soilPh_key = _get_soilPh_lookup_key(soilPh)
     temperature_key = _get_temperature_lookup_key(temperature)
     data = get_term_lookup(term_id, f"NH3_emissions_factor_{soilPh_key}")
-    return safe_parse_float(extract_grouped_data(data, temperature_key), 1)
+    return safe_parse_float(extract_grouped_data(data, temperature_key), None)
 def get_country_breakdown(model: str, term_id: str, country_id: str, col_name: str):
     lookup = download_lookup(BREAKDOWN_LOOKUP)
     value = get_table_value(lookup, 'termid', country_id, column_name(col_name))
     debugMissingLookup(BREAKDOWN_LOOKUP, 'termid', country_id, col_name, value, model=model, term=term_id)
-    return safe_parse_float(value, 1)
+    return safe_parse_float(value, None)
 def get_cycle_inputs(cycle: dict):

hestia_earth/models/utils/measurement.py CHANGED Viewed

@@ -1,18 +1,19 @@
+from collections import defaultdict
+from collections.abc import Iterable
 from functools import reduce
 from dateutil import parser
 from statistics import mode, mean
-from typing import Any, Union
+from typing import Any, Optional,  Union
-from hestia_earth.schema import SchemaType
+from hestia_earth.schema import MeasurementMethodClassification, SchemaType
 from hestia_earth.utils.api import download_hestia
 from hestia_earth.utils.model import linked_node
 from hestia_earth.utils.tools import non_empty_list, flatten, safe_parse_float
 from hestia_earth.utils.date import diff_in_days
-from . import _term_id, _include_method
+from . import _term_id, _include_method, flatten_args
 from .term import get_lookup_value
-# from hestia_earth.models.site.utils import _has_all_months
 # TODO: verify those values
 MAX_DEPTH = 1000
@@ -24,6 +25,8 @@ MEASUREMENT_REDUCE = {
     'sum': lambda value: sum(value)
 }
+MEASUREMENT_METHOD_CLASSIFICATIONS = [e.value for e in MeasurementMethodClassification]
 def _new_measurement(term, model=None):
     node = {'@type': SchemaType.MEASUREMENT.value}
@@ -207,3 +210,114 @@ def most_relevant_measurement_value_by_depth_and_date(
     nearest_value, nearest_date = min(zip(values, dates), key=lambda i: date_distance(i[1]), default=(default, None))
     return nearest_value, nearest_date
+_MEASUREMENT_METHOD_CLASSIFICATION_RANKING = [
+    MeasurementMethodClassification.ON_SITE_PHYSICAL_MEASUREMENT,
+    MeasurementMethodClassification.MODELLED_USING_OTHER_MEASUREMENTS,
+    MeasurementMethodClassification.TIER_3_MODEL,
+    MeasurementMethodClassification.TIER_2_MODEL,
+    MeasurementMethodClassification.TIER_1_MODEL,
+    MeasurementMethodClassification.PHYSICAL_MEASUREMENT_ON_NEARBY_SITE,
+    MeasurementMethodClassification.GEOSPATIAL_DATASET,
+    MeasurementMethodClassification.REGIONAL_STATISTICAL_DATA,
+    MeasurementMethodClassification.COUNTRY_LEVEL_STATISTICAL_DATA,
+    MeasurementMethodClassification.EXPERT_OPINION,
+    MeasurementMethodClassification.UNSOURCED_ASSUMPTION
+]
+"""
+A ranking of `MeasurementMethodClassification`s from strongest to weakest.
+"""
+_MeasurementMethodClassifications = Union[
+    MeasurementMethodClassification, str, Iterable[Union[MeasurementMethodClassification, str]]
+]
+"""
+A type alias for a single measurement method classification, as either an MeasurementMethodClassification enum or
+string, or multiple measurement method classification, as either an iterable of MeasurementMethodClassification enums
+or strings.
+"""
+def min_measurement_method_classification(
+    *methods: _MeasurementMethodClassifications
+) -> MeasurementMethodClassification:
+    """
+    Get the minimum ranking measurement method from the provided methods.
+    n.b., `max` function is used as weaker methods have higher indices.
+    Parameters
+    ----------
+    *methods : MeasurementMethodClassification | str | Iterable[MeasurementMethodClassification] | Iterable[str]
+        Measurement method classifications or iterables of measurement method classification.
+    Returns
+    -------
+    MeasurementMethodClassification
+        The measurement method classification with the minimum ranking.
+    """
+    methods_ = [to_measurement_method_classification(arg) for arg in flatten_args(methods)]
+    return max(
+        methods_,
+        key=lambda method: _MEASUREMENT_METHOD_CLASSIFICATION_RANKING.index(method),
+        default=_MEASUREMENT_METHOD_CLASSIFICATION_RANKING[-1]
+    )
+def to_measurement_method_classification(
+    method: Union[MeasurementMethodClassification, str]
+) -> Optional[MeasurementMethodClassification]:
+    """
+    Convert the input to a `MeasurementMethodClassification` if possible.
+    Parameters
+    ----------
+    method : MeasurementMethodClassification | str
+        The measurement method as either a `str` or `MeasurementMethodClassification`.
+    Returns
+    -------
+    MeasurementMethodClassification | None
+        The matching `MeasurementMethodClassification` or `None` if invalid string.
+    """
+    return (
+        method if isinstance(method, MeasurementMethodClassification)
+        else MeasurementMethodClassification(method) if method in MEASUREMENT_METHOD_CLASSIFICATIONS
+        else None
+    )
+def group_measurements_by_method_classification(
+    nodes: list[dict]
+) -> dict[MeasurementMethodClassification, list[dict]]:
+    """
+    Group [Measurement](https://www.hestia.earth/schema/Measurement) nodes by their method classification.
+    The returned dict has the shape:
+    ```
+    {
+        method (MeasurementMethodClassification): nodes (list[dict]),
+        ...methods
+    }
+    ```
+    Parameters
+    ----------
+    nodes : list[dict]
+        A list of Measurement nodes.
+    Returns
+    -------
+    dict[MeasurementMethodClassification, list[dict]]
+        The measurement nodes grouped by method classification.
+    """
+    valid_nodes = (node for node in nodes if node.get("@type") == SchemaType.MEASUREMENT.value)
+    def group_node(groups: dict, node: dict) -> list[dict]:
+        measurement_method_classification = MeasurementMethodClassification(node.get("methodClassification"))
+        groups[measurement_method_classification].append(node)
+        return groups
+    grouped_nodes = reduce(group_node, valid_nodes, defaultdict(list))
+    return dict(grouped_nodes)

hestia_earth/models/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION = '0.61.7'
1	+ VERSION = '0.61.8'

{hestia_earth_models-0.61.7.dist-info → hestia_earth_models-0.61.8.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hestia-earth-models
-Version: 0.61.7
+Version: 0.61.8
 Summary: Hestia's set of modules for filling gaps in the activity data using external datasets (e.g. populating soil properties with a geospatial dataset using provided coordinates) and internal lookups (e.g. populating machinery use from fuel use). Includes rules for when gaps should be filled versus not (e.g. never gap fill yield, gap fill crop residue if yield provided etc.).
 Home-page: https://gitlab.com/hestia-earth/hestia-engine-models
 Author: Hestia Team

hestia-earth-models 0.61.7__py3-none-any.whl → 0.61.8__py3-none-any.whl

Potentially problematic release.

hestia-earth-models 0.61.7py3-none-any.whl → 0.61.8py3-none-any.whl