PyPI - hestia-earth-models - Versions diffs - 0.58.0__py3-none-any.whl → 0.59.0__py3-none-any.whl - Mend

hestia-earth-models 0.58.0py3-none-any.whl → 0.59.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hestia-earth-models might be problematic. Click here for more details.

Files changed (34) hide show

hestia_earth/models/utils/blank_node.py CHANGED Viewed

@@ -8,6 +8,7 @@ from functools import reduce
 from statistics import mode, mean
 from typing import (
     Any,
+    List,
     Callable,
     NamedTuple,
     Optional,
@@ -23,7 +24,7 @@ from hestia_earth.utils.tools import (
 )
 from ..log import debugValues, log_as_table
-from . import _filter_list_term_unit
+from . import is_from_model, _filter_list_term_unit
 from .constant import Units
 from .property import get_node_property, get_node_property_value
 from .lookup import (
@@ -127,6 +128,16 @@ def find_terms_value(nodes: list, term_id: str, default: Union[int, None] = 0):
     return list_sum(get_total_value(filter(lambda node: node.get('term', {}).get('@id') == term_id, nodes)), default)
+def has_gap_filled_by_ids(nodes: list, term_ids: List[str]):
+    nodes = [n for n in nodes if n.get('term', {}).get('@id') in term_ids]
+    return any([is_from_model(n) for n in nodes])
+def has_original_by_ids(nodes: list, term_ids: List[str]):
+    nodes = [n for n in nodes if n.get('term', {}).get('@id') in term_ids]
+    return any([not is_from_model(n) for n in nodes])
 def get_total_value(nodes: list):
     """
     Get the total `value` of a list of Blank Nodes.
@@ -941,27 +952,27 @@ def _build_time_fraction_dict(
     Returns
     -------
     dict
-        A dictionary containing "fraction_of_year" and "fraction_of_node_duration".
+        A dictionary containing "fraction_of_group_duration" and "fraction_of_node_duration".
     """
-    year_duration = _datetime_range_duration(group_datetime_range, add_second=True)
+    group_duration = _datetime_range_duration(group_datetime_range, add_second=True)
     node_duration = _datetime_range_duration(node_datetime_range, add_second=True)
     intersection_duration = _calc_datetime_range_intersection_duration(
         node_datetime_range, group_datetime_range, add_second=True
     )
-    fraction_of_year = intersection_duration / year_duration
+    fraction_of_group_duration = intersection_duration / group_duration
     fraction_of_node_duration = intersection_duration / node_duration
     return {
-        "fraction_of_year": fraction_of_year,
+        "fraction_of_group_duration": fraction_of_group_duration,
         "fraction_of_node_duration": fraction_of_node_duration
     }
 def _validate_time_fraction_dict(
     time_fraction_dict: dict,
-    is_final_year: bool
+    is_final_group: bool
 ) -> bool:
     """
     Return `True` if the the node intersections with a year group by
@@ -973,30 +984,19 @@ def _validate_time_fraction_dict(
     be counted in the year group if the majority of that node takes place in
     that year.
     """
-    FRACTION_OF_YEAR_THRESHOLD = 0.3
+    FRACTION_OF_GROUP_DURATION_THRESHOLD = 0.3
     FRACTION_OF_NODE_DURATION_THRESHOLD = 0.5
     return any([
-        time_fraction_dict["fraction_of_year"] > FRACTION_OF_YEAR_THRESHOLD,
+        time_fraction_dict["fraction_of_group_duration"] > FRACTION_OF_GROUP_DURATION_THRESHOLD,
         time_fraction_dict["fraction_of_node_duration"] > FRACTION_OF_NODE_DURATION_THRESHOLD,
-        is_final_year and time_fraction_dict["fraction_of_node_duration"] == FRACTION_OF_NODE_DURATION_THRESHOLD
+        is_final_group and time_fraction_dict["fraction_of_node_duration"] == FRACTION_OF_NODE_DURATION_THRESHOLD
     ])
-def _datestr_within_range(datestr: str, group_datetime_range: DatetimeRange) -> bool:
-    """
-    Validate if the date represented by the date string falls within the specified datetime range.
+def _build_update_dict(node: dict, years: list, target_year: int) -> dict:
     """
-    return _datetime_within_range(
-        safe_parse_date(_gapfill_datestr(datestr)),
-        group_datetime_range
-    )
-def _build_update_dict(node: dict, group_datetime_range: DatetimeRange) -> dict:
-    """
-    Build an update dictionary containing values and dates from a node that fall within a given
-    datetime range.
+    Build an update dictionary containing values and dates from a node that fall within a given year.
     This is only required if when `group_nodes_by_year` `mode = GroupNodesByYearMode.DATES`
@@ -1004,23 +1004,20 @@ def _build_update_dict(node: dict, group_datetime_range: DatetimeRange) -> dict:
     ----------
     node : dict
         The node containing values and dates.
-    group_datetime_range : DatetimeRange
-        The datetime range to filter values and dates.
+    year : int
+        The year to be matched.
     Returns
     -------
     dict
         An update dictionary containing "value" and "dates" keys.
     """
+    valid_indices = {
+        i for i, y in enumerate(years) if y == target_year
+    }
     return {
-        "value": [
-            val for val, datestr in zip(node.get("value"), node.get("dates"))
-            if _datestr_within_range(datestr, group_datetime_range)
-        ],
-        "dates": [
-            datestr for datestr in node.get("dates")
-            if _datestr_within_range(datestr, group_datetime_range)
-        ]
+        "value": [node.get("value")[i] for i in valid_indices],
+        "dates": [node.get("dates")[i] for i in valid_indices]
     }
@@ -1068,6 +1065,12 @@ def group_nodes_by_year(
             node, default_node_duration=default_node_duration
         )
+        # pre-parse the "dates" field so it doesn't get re-calculated in each iteration of the for-loop
+        years = (
+            [safe_parse_date(datestr).year for datestr in node.get("dates", [])]
+            if mode == GroupNodesByYearMode.DATES else []
+        )
         range_start = node_datetime_range.start.year if node_datetime_range else 0
         range_end = node_datetime_range.end.year + 1 if node_datetime_range else 0
@@ -1081,7 +1084,7 @@ def group_nodes_by_year(
             is_final_year = _datetime_within_range(node_datetime_range.end, group_datetime_range)
             time_fraction_dict = _build_time_fraction_dict(group_datetime_range, node_datetime_range)
-            update_dict = _build_update_dict(node, group_datetime_range) if mode == GroupNodesByYearMode.DATES else {}
+            update_dict = _build_update_dict(node, years, year) if mode == GroupNodesByYearMode.DATES else {}
             should_run = (
                 mode == GroupNodesByYearMode.DATES
@@ -1104,4 +1107,124 @@ def group_nodes_by_year(
         for year, group in grouped.items()
     }
-    return dict(sorted(iterated.items())) if sort_result else dict(iterated)
+    return dict(sorted(iterated.items())) if sort_result else iterated
+def group_nodes_by_year_and_month(
+    nodes: list[dict],
+    default_node_duration: int = 1,
+    sort_result: bool = True,
+    inner_key: Union[Any, None] = None
+) -> dict[int, list[dict]]:
+    """
+    Group nodes by year based on either their "startDate" and "endDate" fields. Incomplete date strings are gap-filled
+    automatically using `_gapfill_datestr` function.
+    Returns a dict in the shape:
+    ```
+    {
+        year (int): {
+            month (int): nodes (list[dict])  # for each month 1 - 12
+        }
+    }
+    ```
+    Parameters
+    ----------
+    nodes : list[dict]
+        A list of nodes with start and end date information.
+    default_node_duration : int, optional
+        Default duration of a node years if start date is not available, by default 1.
+    sort_result : bool, optional
+        Flag to sort the result by year, by default True.
+    inner_key: Any | None
+        An optional inner dictionary key for the outputted annualised groups (can be used to merge annualised
+        dictionaries together), default value: `None`.
+    Returns
+    -------
+    dict[int, list[dict]]
+        A dictionary where keys are years and values are lists of nodes.
+    """
+    valid_nodes = [node for node in nodes if _should_run_node_by_end_date(node)]
+    def group_node(groups: dict, index: int):
+        node = valid_nodes[index]
+        node_datetime_range = _get_node_datetime_range_from_start_and_end_date(
+            node, default_node_duration=default_node_duration
+        )
+        range_start = node_datetime_range.start.year if node_datetime_range else 0
+        range_end = node_datetime_range.end.year + 1 if node_datetime_range else 0
+        for year in range(range_start, range_end):
+            for month in range(1, 13):
+                group_datetime_range = DatetimeRange(
+                    start=safe_parse_date(_gapfill_datestr(f"{year}-{month:02}", DatestrGapfillMode.START)),
+                    end=safe_parse_date(_gapfill_datestr(f"{year}-{month}", DatestrGapfillMode.END))
+                )
+                is_final_month = _datetime_within_range(node_datetime_range.end, group_datetime_range)
+                time_fraction_dict = _build_time_fraction_dict(group_datetime_range, node_datetime_range)
+                should_run = _validate_time_fraction_dict(time_fraction_dict, is_final_month)
+                should_run and groups[year][month].append(node)
+        return groups
+    grouped = reduce(group_node, range(len(valid_nodes)), defaultdict(lambda: defaultdict(list)))
+    iterated = {
+        year: {inner_key: dict(group)} if inner_key else dict(group)
+        for year, group in grouped.items()
+    }
+    return dict(sorted(iterated.items())) if sort_result else iterated
+# --- Group nodes by last date ---
+def _get_last_date(datestrs: list[str]) -> Optional[str]:
+    """
+    Reduce a datestrs down to a single datestr by selecting the last one.
+    Parameters
+    ----------
+    datestrs : list
+        A list of datestrings, e.g. the value of a node's `dates` field.
+    Returns
+    -------
+    str | None
+        Returns the latest datestr or `None` if no valid datestr in list.
+    """
+    return sorted(datestrs)[-1] if len(datestrs) > 0 else None
+def group_nodes_by_last_date(nodes: list) -> dict[str, list[dict]]:
+    """
+    Group a list of nodes by the last date of their `dates` field. Nodes with no `dates` field will be sorted into
+    the `no-dates` group.
+    Parameters
+    ----------
+    nodes : list[dict]
+        A list of Hestia format nodes.
+    Return
+    ------
+    dict
+        A dictionary of nodes grouped by latest date, in the format `{date: list[node]}`.
+    """
+    DEFAULT_KEY = 'no-dates'
+    def group_by(group: dict, node: dict):
+        dates = node.get('dates', [])
+        key = _get_last_date(dates) or DEFAULT_KEY
+        return group | {key: group.get(key, []) + [node]}
+    return reduce(group_by, nodes, {})

hestia_earth/models/utils/cycle.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from hestia_earth.schema import CycleFunctionalUnit, SiteSiteType, TermTermType
 from hestia_earth.utils.model import filter_list_term_type, find_term_match, find_primary_product
-from hestia_earth.utils.tools import list_sum, safe_parse_float, safe_parse_date
+from hestia_earth.utils.tools import flatten, list_sum, safe_parse_float, safe_parse_date
 from ..log import logRequirements, debugValues
 from .lookup import factor_value
@@ -370,7 +370,7 @@ def is_organic(cycle: dict):
 def is_irrigated(cycle: dict):
     """
-    Check if the `Cycle` is irrigated, i.e. if it contains an irrigated `Practice`.
+    Check if the `Cycle` is irrigated, i.e. if it contains an irrigated `Practice` with a value above `0`.
     Parameters
     ----------
@@ -382,7 +382,10 @@ def is_irrigated(cycle: dict):
     bool
         `True` if the `Cycle` is irrigated, `False` otherwise.
     """
-    return list_sum(find_term_match(cycle.get('practices', []), 'irrigated').get('value', [])) > 0
+    irrigated_practices = [
+        p for p in cycle.get('practices', []) if p.get('term', {}).get('@id', '').startswith('irrigated')
+    ]
+    return list_sum(flatten([p.get('value', []) for p in irrigated_practices])) > 0
 def cycle_end_year(cycle: dict):

hestia_earth/models/utils/measurement.py CHANGED Viewed

@@ -17,7 +17,7 @@ from .term import get_lookup_value
 # TODO: verify those values
 MAX_DEPTH = 1000
 OLDEST_DATE = '1800'
+SOIL_TEXTURE_IDS = ['sandContent', 'siltContent', 'clayContent']
 MEASUREMENT_REDUCE = {
     'mean': lambda value: mean(value),
     'mode': lambda value: mode(value),

hestia_earth/models/utils/term.py CHANGED Viewed

@@ -544,7 +544,7 @@ def get_residue_removed_or_burnt_terms():
     return list(map(lambda n: n["@id"], terms))
-def get_rice_plant_upland_terms():
+def get_upland_rice_land_cover_terms():
     """
     Find all `landCover` terms related to upland rice the Glossary.
@@ -566,6 +566,28 @@ def get_rice_plant_upland_terms():
     return list(map(lambda n: n["@id"], terms))
+def get_upland_rice_crop_terms():
+    """
+    Find all `crop` terms related to upland rice the Glossary.
+    Returns
+    -------
+    list
+        List of matching term `@id` as `str`.
+    """
+    terms = search({
+        "bool": {
+            "must": [
+                {"match": {"@type": SchemaType.TERM.value}},
+                {"match": {"termType.keyword": TermTermType.CROP.value}},
+                {"match_phrase": {"name": "rice"}},
+                {"match": {"name": "upland"}}
+            ],
+        }
+    }, limit=LIMIT)
+    return list(map(lambda n: n["@id"], terms))
 def get_pasture_system_terms():
     """
     Find all `system` terms with the name `pasture`:
@@ -581,3 +603,26 @@ def get_pasture_system_terms():
         'name': 'pasture'
     }, limit=LIMIT)
     return list(map(lambda n: n["@id"], terms))
+def get_long_fallow_land_cover_terms():
+    """
+    Find all `landCover` terms with the name `long fallow`:
+    https://hestia.earth/glossary?termType=landCover&query=long%fallow
+    Returns
+    -------
+    list
+        List of matching term `@id` as `str`.
+    """
+    terms = search({
+        "bool": {
+            "must": [
+                {"match": {"@type": SchemaType.TERM.value}},
+                {"match": {"termType.keyword": TermTermType.LANDCOVER.value}},
+                {"match_phrase_prefix": {"name": "long"}},
+                {"match": {"name": "fallow"}}
+            ],
+        }
+    }, limit=LIMIT)
+    return list(map(lambda n: n["@id"], terms))

hestia_earth/models/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION = '0.58.0'
1	+ VERSION = '0.59.0'

{hestia_earth_models-0.58.0.dist-info → hestia_earth_models-0.59.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hestia-earth-models
-Version: 0.58.0
+Version: 0.59.0
 Summary: Hestia's set of modules for filling gaps in the activity data using external datasets (e.g. populating soil properties with a geospatial dataset using provided coordinates) and internal lookups (e.g. populating machinery use from fuel use). Includes rules for when gaps should be filled versus not (e.g. never gap fill yield, gap fill crop residue if yield provided etc.).
 Home-page: https://gitlab.com/hestia-earth/hestia-engine-models
 Author: Hestia Team
@@ -55,19 +55,15 @@ run('no3ToGroundwaterSoilFlux', cycle_data)
 ### Using Spatial Models
-We have models that can gap-fill geographical information on a `Site`.
-If you want to use thse models:
+We have models that can gap-fill geographical information on a `Site`. If you want to use these models:
 1. Install the library: `pip install hestia_earth.earth_engine`
 2. Follow the [Getting Started instructions](https://gitlab.com/hestia-earth/hestia-earth-engine#getting-started).
-### Using Ecoinvent Model
+### Using the ecoinventV3 model
 ecoinvent is a consistent, transparent, and well validated life cycle inventory database.
 We use ecoinvent data to ascertain the environmental impacts of activities that occur outside of our system boundary, for example data on the environmental impacts of extracting oil and producing diesel, or the impacts of manufacturing plastics.
-To include these data in your environmental impact assessments using Hestia, you must own a suitable [ecoinvent license](https://ecoinvent.org/offerings/licences/).
-Please contact us at community@hestia.earth for instructions to download the required file to run the model.
-Once downloaded, copy the file in the _hestia_earth/models/data/ecoinventV3_ folder.
-Thank you!
+The `ecoinventV3` model requires a valid [license](https://ecoinvent.org/offerings/licences/) to run. We are currently working on a way to enable users of this code with a valid ecoinvent licence to run these models themselves, but for now, these models are only available on the public platform.

hestia-earth-models 0.58.0__py3-none-any.whl → 0.59.0__py3-none-any.whl

Potentially problematic release.

hestia-earth-models 0.58.0py3-none-any.whl → 0.59.0py3-none-any.whl