PyPI - hestia-earth-models - Versions diffs - 0.61.8__py3-none-any.whl → 0.62.1__py3-none-any.whl - Mend

hestia-earth-models 0.61.8py3-none-any.whl → 0.62.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hestia-earth-models might be problematic. Click here for more details.

Files changed (64) hide show

hestia_earth/models/utils/blank_node.py CHANGED Viewed

@@ -1,11 +1,13 @@
+import calendar
 from calendar import monthrange
 from collections import defaultdict
 from collections.abc import Iterable
-from datetime import datetime
+from datetime import datetime, timedelta
+from uuid import uuid4
 from dateutil.relativedelta import relativedelta
 from enum import Enum
 from functools import reduce
-from statistics import mode, mean
 from typing import (
     Any,
     List,
@@ -24,9 +26,10 @@ from hestia_earth.utils.tools import (
     safe_parse_float,
     non_empty_list
 )
+from hestia_earth.utils.blank_node import ArrayTreatment, get_node_value
 from ..log import debugValues, log_as_table
-from . import is_from_model, _filter_list_term_unit, is_iterable
+from . import is_from_model, _filter_list_term_unit, is_iterable, _omit
 from .constant import Units
 from .property import get_node_property, get_node_property_value
 from .lookup import (
@@ -46,7 +49,9 @@ def group_by_keys(group_keys: list = ['term']):
     return run
-def _module_term_id(term_id: str, module): return getattr(module, 'TERM_ID', term_id).split(',')[0]
+def _module_term_id(term_id: str, module):
+    term_id_str = term_id.split('.')[-1] if '.' in term_id else term_id
+    return getattr(module, 'TERM_ID', term_id_str).split(',')[0]
 def _run_model_required(model: str, term: dict, data: dict):
@@ -342,111 +347,6 @@ def convert_to_carbon(node: dict, model: str, term_id: str, blank_nodes: list, *
     ]) if len(missing_carbon_property) == 0 else None
-class ArrayTreatment(Enum):
-    """
-    Enum representing different treatments for arrays of values.
-    """
-    MEAN = 'mean'
-    MODE = 'mode'
-    SUM = 'sum'
-    FIRST = 'first'
-    LAST = 'last'
-def _should_run_array_treatment(value):
-    return isinstance(value, Iterable) and len(value) > 0
-DEFAULT_ARRAY_TREATMENT = ArrayTreatment.MEAN
-ARRAY_TREATMENT_TO_REDUCER = {
-    ArrayTreatment.MEAN: lambda value: mean(value) if _should_run_array_treatment(value) else 0,
-    ArrayTreatment.MODE: lambda value: mode(value) if _should_run_array_treatment(value) else 0,
-    ArrayTreatment.SUM: lambda value: sum(value) if _should_run_array_treatment(value) else 0,
-    ArrayTreatment.FIRST: lambda value: value[0] if _should_run_array_treatment(value) else 0,
-    ArrayTreatment.LAST: lambda value: value[-1] if _should_run_array_treatment(value) else 0
-}
-"""
-A dictionary mapping ArrayTreatment enums to corresponding reducer functions.
-"""
-def _retrieve_array_treatment(
-    node: dict,
-    is_larger_unit: bool = False,
-    default: ArrayTreatment = ArrayTreatment.FIRST
-) -> ArrayTreatment:
-    """
-    Retrieves the array treatment for a given node.
-    Array treatments are used to reduce an array's list of values into
-    a single value. The array treatment is retrieved from a lookup on
-    the node's term.
-    Parameters
-    ----------
-    node : dict
-        The dictionary representing the node.
-    is_larger_unit : bool, optional
-        Flag indicating whether to use the larger unit lookup, by default `False`.
-    default : ArrayTreatment, optional
-        Default value to return if the lookup fails, by default `ArrayTreatment.FIRST`.
-    Returns
-    -------
-    ArrayTreatment
-        The retrieved array treatment.
-    """
-    ARRAY_TREATMENT_LOOKUPS = [
-        'arrayTreatmentLargerUnitOfTime',
-        'arrayTreatment'
-    ]
-    lookup = ARRAY_TREATMENT_LOOKUPS[0] if is_larger_unit else ARRAY_TREATMENT_LOOKUPS[1]
-    term = node.get('term', {})
-    lookup_value = get_lookup_value(term, lookup, skip_debug=True)
-    return next(
-        (treatment for treatment in ArrayTreatment if treatment.value == lookup_value),
-        default
-    )
-def get_node_value(
-    node: dict,
-    is_larger_unit: bool = False,
-    array_treatment: Optional[ArrayTreatment] = None,
-    default: Any = 0
-) -> Union[float, bool]:
-    """
-    Get the value from the dictionary representing the node,
-    applying optional array treatment if the value is a list.
-    Parameters
-    ----------
-    node : dict
-        The dictionary representing the node.
-    is_larger_unit : bool, optional
-        A flag indicating whether the unit of time is larger, by default `False`.
-    array_treatment : ArrayTreatment, optional
-        An optional override for the treatment to be applied to an array value, if `None` the array treatment in the
-        node's term's lookup is used (which defaults to `FIRST` if no array treatment is specified), by default `None`.
-    Returns
-    -------
-    float | bool
-        The extracted value from the node.
-    """
-    value = node.get("value", 0)
-    reducer = ARRAY_TREATMENT_TO_REDUCER[(
-        array_treatment or _retrieve_array_treatment(node, is_larger_unit=is_larger_unit)
-    )] if isinstance(value, list) and len(value) > 0 else None
-    return reducer(value) if reducer else value if isinstance(value, bool) else value or default
 def _convert_to_set(
     variable: Union[Iterable[Any], Any]
 ) -> set:
@@ -560,7 +460,7 @@ def cumulative_nodes_match(
     """
     values = [
         get_node_value(
-            node, is_larger_unit, array_treatment
+            node, 'value', is_larger_unit, array_treatment
         ) or default_node_value for node in nodes if function(node)
     ]
@@ -995,33 +895,6 @@ def _validate_time_fraction_dict(
     ])
-def _build_update_dict(node: dict, years: list, target_year: int) -> dict:
-    """
-    Build an update dictionary containing values and dates from a node that fall within a given year.
-    This is only required if when `group_nodes_by_year` `mode = GroupNodesByYearMode.DATES`
-    Parameters
-    ----------
-    node : dict
-        The node containing values and dates.
-    year : int
-        The year to be matched.
-    Returns
-    -------
-    dict
-        An update dictionary containing "value" and "dates" keys.
-    """
-    valid_indices = {
-        i for i, y in enumerate(years) if y == target_year
-    }
-    return {
-        "value": [node.get("value")[i] for i in valid_indices],
-        "dates": [node.get("dates")[i] for i in valid_indices]
-    }
 def group_nodes_by_year(
     nodes: list[dict],
     default_node_duration: int = 1,
@@ -1057,7 +930,7 @@ def group_nodes_by_year(
     should_run_node = GROUP_NODES_BY_YEAR_MODE_TO_SHOULD_RUN_NODE_FUNCTION[mode]
     get_node_datetime_range = GROUP_NODES_BY_YEAR_MODE_TO_GET_DATETIME_RANGE_FUNCTION[mode]
-    valid_nodes = [node for node in nodes if should_run_node(node)]
+    valid_nodes = non_empty_list(flatten(split_node_by_dates(node) for node in nodes if should_run_node(node)))
     def group_node(groups: dict, index: int):
         node = valid_nodes[index]
@@ -1066,12 +939,6 @@ def group_nodes_by_year(
             node, default_node_duration=default_node_duration
         )
-        # pre-parse the "dates" field so it doesn't get re-calculated in each iteration of the for-loop
-        years = (
-            [safe_parse_date(datestr).year for datestr in node.get("dates", [])]
-            if mode == GroupNodesByYearMode.DATES else []
-        )
         range_start = node_datetime_range.start.year if node_datetime_range else 0
         range_end = node_datetime_range.end.year + 1 if node_datetime_range else 0
@@ -1085,7 +952,6 @@ def group_nodes_by_year(
             is_final_year = _datetime_within_range(node_datetime_range.end, group_datetime_range)
             time_fraction_dict = _build_time_fraction_dict(group_datetime_range, node_datetime_range)
-            update_dict = _build_update_dict(node, years, year) if mode == GroupNodesByYearMode.DATES else {}
             should_run = (
                 mode == GroupNodesByYearMode.DATES
@@ -1096,7 +962,7 @@ def group_nodes_by_year(
             )
             should_run and groups[year].append(
-                node | time_fraction_dict | update_dict
+                node | time_fraction_dict
             )
         return groups
@@ -1111,6 +977,57 @@ def group_nodes_by_year(
     return dict(sorted(iterated.items())) if sort_result else iterated
+def split_node_by_dates(node: dict) -> list[dict]:
+    """
+    Split a node with an array-like `value` and `dates` with multiple elements into a list of nodes with a single
+    `value` and `dates`. All other array-like node fields (`sd`, `min`, `max`,  and `observations`) will be also be
+    split. Any other fields will be copied with no modifications.
+    All split fields will still be array-like, but will only contain one element. Any array-like fields with a
+    different number of elements to `value` will not be split.
+    This function should only run on nodes with array-like `value` and `dates` (e.g., nodes with `@type` == `Emission`,
+    `Input`,`Measurement`, `Practice` or `Product`).
+    Parameters
+    ----------
+    node : dict
+        A HESTIA blank node with array-like `value` and `dates` (and optional array-like fields `sd`, `min`, `max`, and
+        `observations`).
+    Returns
+    -------
+    list[dict]
+        A list of nodes with single `value` and `dates`.
+    """
+    REQUIRED_KEYS = ["value", "dates"]
+    OPTIONAL_KEYS = ["sd", "min", "max", "observations"]
+    value = node.get("value", [])
+    target_len = len(value) if isinstance(value, list) else -1
+    def should_run_key(key: str) -> bool:
+        item = node.get(key, [])
+        return isinstance(item, list) and len(item) == target_len
+    should_run = all([
+        target_len > 0,
+        all(should_run_key(key) for key in REQUIRED_KEYS)
+    ])
+    valid_keys = REQUIRED_KEYS + [key for key in OPTIONAL_KEYS if should_run_key(key)]
+    def split(result: list[dict], index: int) -> list[dict]:
+        update = {key: [node[key][index]] for key in valid_keys}
+        result.append(node | update)
+        return result
+    return (
+        sorted(reduce(split, range(len(value)), list()), key=lambda node: node.get("dates", []))
+        if should_run else [node]
+    )
 def group_nodes_by_year_and_month(
     nodes: list[dict],
     default_node_duration: int = 1,
@@ -1257,3 +1174,99 @@ def get_inputs_from_properties(input: dict, term_types: Union[TermTermType, List
         } for p in (properties or []) if all([p.get('key'), p.get('value')])
     ]) if input_value > 0 else []
     return filter_list_term_type(inputs, term_types)
+def _get_condensed_nodes(nodes: list) -> tuple[list, bool]:
+    """Only considers nodes which already match on non-date criteria."""
+    CONDENSABLE_UNITS = [Units.BOOLEAN.value, Units.PERCENTAGE_AREA.value]
+    condensed_nodes = []
+    matched_uuids = set()
+    nodes_by_start_date = {_full_date_str(date_str=n["startDate"], is_end=True): n for n in nodes if "startDate" in n}
+    if len(nodes_by_start_date) != len(nodes):
+        return nodes, False
+    for node in nodes:
+        search_date = _offset_date(date_str=node.get("endDate", ""))
+        if node["uuid"] in matched_uuids:
+            continue
+        if (search_date in nodes_by_start_date and nodes_by_start_date[search_date]["uuid"] not in matched_uuids
+                and node.get("term", {}).get("units") in CONDENSABLE_UNITS):
+            new_node = node.copy()
+            new_node["endDate"] = nodes_by_start_date[search_date]["endDate"]
+            condensed_nodes.append(new_node)
+            matched_uuids.add(nodes_by_start_date[search_date]["uuid"])
+        elif node["uuid"] not in matched_uuids:
+            condensed_nodes.append(node)
+    return condensed_nodes, len(matched_uuids) > 0
+def condense_nodes(nodes: list) -> list:
+    grouped_nodes = _group_nodes_by_term_and_value(nodes)
+    condensed_nodes = dict()
+    any_changes_made = False
+    for key, node_group in grouped_nodes.items():
+        condensed_nodes[key] = node_group
+        while len(condensed_nodes[key]) > 1:
+            condensed_nodes[key], changes_made = _get_condensed_nodes(condensed_nodes[key])
+            if not changes_made:
+                break
+            any_changes_made = True
+    if not any_changes_made:
+        return [_omit(values=n, keys=["uuid"]) for n in nodes]
+    return sorted(
+        flatten([_omit(values=n, keys=["uuid"]) for nodes in condensed_nodes.values() for n in nodes]),
+        key=lambda x: x["startDate"]
+    )
+DATE_FORMAT = "%Y-%m-%d"
+def _variable_length_str_to_date(date_str: str, is_end: bool) -> datetime:
+    """Converts to date, adding start or end of year to YYYY strings as indicated by is_end."""
+    return datetime.strptime(_full_date_str(date_str, is_end=is_end), DATE_FORMAT)
+def _full_date_str(date_str: str, is_end: bool) -> str:
+    suffix = ""
+    if len(date_str) == 4:
+        # Format YYYY
+        suffix = "-12-31" if is_end else "-01-01"
+    elif len(date_str) == 7:
+        # Format YYYY-MM
+        suffix = f"-{calendar.monthrange(int(date_str[:4]), int(date_str[5:7]))[1]}" if is_end else "-01"
+    return date_str + suffix
+def _with_full_dates(node: dict) -> dict:
+    output_node = node.copy()
+    if "startDate" in output_node:
+        output_node["startDate"] = _full_date_str(output_node["startDate"], is_end=False)
+    if "endDate" in output_node:
+        output_node["endDate"] = _full_date_str(output_node["endDate"], is_end=True)
+    return output_node
+def _offset_date(date_str: str, days: int = 1, is_end: bool = True) -> str:
+    return (
+        _variable_length_str_to_date(date_str=date_str, is_end=is_end) + timedelta(days=days)
+    ).strftime(DATE_FORMAT)
+def _group_nodes_by_term_and_value(nodes: list) -> dict:
+    grouped_nodes = defaultdict(list)
+    for node in nodes:
+        term_id = node.get("term", {}).get("@id", "")
+        value = "-".join([str(v) for v in node.get("value")]) if isinstance(node.get("value"), list) \
+            else node.get("value")
+        node["uuid"] = uuid4()
+        grouped_nodes[(term_id, value)].append(_with_full_dates(node))
+    return grouped_nodes

hestia_earth/models/utils/constant.py CHANGED Viewed

@@ -3,6 +3,7 @@ from hestia_earth.utils.tools import list_sum
 class Units(Enum):
+    BOOLEAN = 'boolean'
     HEAD = 'head'
     NUMBER = 'number'
     KG = 'kg'
@@ -31,6 +32,7 @@ class Units(Enum):
     KG_COLD_CARCASS_WEIGHT = 'kg cold carcass weight'
     KG_COLD_DRESSED_CARCASS_WEIGHT = 'kg cold dressed carcass weight'
     KG_READY_TO_COOK_WEIGHT = 'kg ready-to-cook weight'
+    PERCENTAGE_AREA = '% area'
     TO_C = '-C'
     TO_N = '-N'

hestia_earth/models/utils/lookup.py CHANGED Viewed

@@ -76,6 +76,17 @@ _ALLOW_ALL = 'all'
 def _is_site(site: dict): return site.get('@type', site.get('type')) == SchemaType.SITE.value
+def _get_sites(node: dict):
+    site = node.get('site', node.get('cycle', {}).get('site'))
+    other_sites = node.get('otherSites', node.get('cycle', {}).get('otherSites', []))
+    return non_empty_list([site] + other_sites)
+def _get_site_types(node: dict):
+    sites = [node] if _is_site(node) else _get_sites(node)
+    return non_empty_list([site.get('siteType') for site in sites])
 def _model_lookup_values(model: str, term: dict, restriction: str):
     lookup = download_lookup(f"{term.get('termType')}-model-{restriction}.csv")
     values = get_table_value(lookup, 'termid', term.get('@id'), column_name(model))
@@ -83,10 +94,11 @@ def _model_lookup_values(model: str, term: dict, restriction: str):
 def is_model_siteType_allowed(model: str, term: dict, data: dict):
-    site = data if _is_site(data) else data.get('site', data.get('cycle', {}).get('site')) or {}
-    site_type = site.get('siteType')
+    site_types = _get_site_types(data)
     allowed_values = _model_lookup_values(model, term, 'siteTypesAllowed')
-    return True if _ALLOW_ALL in allowed_values or not site_type else site_type in allowed_values
+    return True if _ALLOW_ALL in allowed_values or not site_types else any([
+        (site_type in allowed_values) for site_type in site_types
+    ])
 def _lookup_values(term: dict, column: str):
@@ -96,10 +108,11 @@ def _lookup_values(term: dict, column: str):
 def is_siteType_allowed(data: dict, term: dict):
-    site = data if _is_site(data) else data.get('site', data.get('cycle', {}).get('site')) or {}
-    site_type = site.get('siteType')
+    site_types = _get_site_types(data)
     allowed_values = _lookup_values(term, 'siteTypesAllowed')
-    return True if _ALLOW_ALL in allowed_values or not site_type else site_type in allowed_values
+    return True if _ALLOW_ALL in allowed_values or not site_types else any([
+        (site_type in allowed_values) for site_type in site_types
+    ])
 def is_product_termType_allowed(data: dict, term: dict):

hestia_earth/models/utils/source.py CHANGED Viewed

@@ -18,7 +18,7 @@ def _find_source(biblio_title: str = None):
 def get_source(node: dict, biblio_title: str = None):
-    source = cached_value(node, CACHE_SOURCES_KEY, {}).get(biblio_title, _find_source(biblio_title))
+    source = cached_value(node, CACHE_SOURCES_KEY, {}).get(biblio_title) or _find_source(biblio_title)
     return {'source': source} if source else {}

hestia_earth/models/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION = '0.61.8'
1	+ VERSION = '0.62.1'

{hestia_earth_models-0.61.8.dist-info → hestia_earth_models-0.62.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hestia-earth-models
-Version: 0.61.8
+Version: 0.62.1
 Summary: Hestia's set of modules for filling gaps in the activity data using external datasets (e.g. populating soil properties with a geospatial dataset using provided coordinates) and internal lookups (e.g. populating machinery use from fuel use). Includes rules for when gaps should be filled versus not (e.g. never gap fill yield, gap fill crop residue if yield provided etc.).
 Home-page: https://gitlab.com/hestia-earth/hestia-engine-models
 Author: Hestia Team
@@ -11,8 +11,8 @@ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
 Classifier: Programming Language :: Python :: 3.6
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: hestia-earth.schema ==28.*
-Requires-Dist: hestia-earth.utils >=0.13.0
+Requires-Dist: hestia-earth.schema ==29.*
+Requires-Dist: hestia-earth.utils >=0.13.2
 Requires-Dist: python-dateutil >=2.8.1
 Requires-Dist: CurrencyConverter ==0.16.8
 Requires-Dist: haversine >=2.7.0

hestia-earth-models 0.61.8__py3-none-any.whl → 0.62.1__py3-none-any.whl

Potentially problematic release.

hestia-earth-models 0.61.8py3-none-any.whl → 0.62.1py3-none-any.whl