PyPI - hestia-earth-models - Versions diffs - 0.65.11__py3-none-any.whl → 0.67.0__py3-none-any.whl - Mend

hestia-earth-models 0.65.11py3-none-any.whl → 0.67.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (86) hide show

hestia_earth/models/site/defaultMethodClassification.py CHANGED Viewed

@@ -5,6 +5,8 @@ When gap-filling `management` node on Site, the
 `defaultMethodClassification` and `defaultMethodClassificationDescription` fields become required.
 This model will use the first value in the `management` node.
 """
+from hestia_earth.schema import SiteDefaultMethodClassification
 from hestia_earth.models.log import logRequirements, logShouldRun
 from . import MODEL
@@ -20,12 +22,17 @@ MODEL_KEY = 'defaultMethodClassification'
 def _should_run(site: dict):
-    methodClassification = next((n.get('methodClassification') for n in site.get('management', [])), None)
+    has_management = bool(site.get('management', []))
+    methodClassification = next(
+        (n.get('methodClassification') for n in site.get('management', [])),
+        None
+    ) or SiteDefaultMethodClassification.MODELLED.value
     logRequirements(site, model=MODEL, model_key=MODEL_KEY,
+                    has_management=has_management,
                     methodClassification=methodClassification)
-    should_run = all([methodClassification])
+    should_run = all([has_management, methodClassification])
     logShouldRun(site, MODEL, None, should_run, model_key=MODEL_KEY)
     return should_run, methodClassification

hestia_earth/models/site/defaultMethodClassificationDescription.py CHANGED Viewed

@@ -20,16 +20,18 @@ MODEL_KEY = 'defaultMethodClassificationDescription'
 def _should_run(site: dict):
+    has_management = bool(site.get('management', []))
     methodClassificationDescription = next((
         n.get('methodClassificationDescription')
         for n in site.get('management', [])
         if n.get('methodClassification')
-    ), None)
+    ), None) or 'Data calculated by merging real land use histories and modelled land use histories for each Site.'
     logRequirements(site, model=MODEL, model_key=MODEL_KEY,
+                    has_management=has_management,
                     methodClassificationDescription=methodClassificationDescription)
-    should_run = all([methodClassificationDescription])
+    should_run = all([has_management, methodClassificationDescription])
     logShouldRun(site, MODEL, None, should_run, model_key=MODEL_KEY)
     return should_run, methodClassificationDescription

hestia_earth/models/site/management.py CHANGED Viewed

@@ -10,7 +10,8 @@ tillage, cropResidueManagement and landUseManagement.
 All values are copied from the source node, except for crop and forage terms in which case the dates are copied from the
 cycle.
-Where `startDate` is missing from landCover products, gap-filling is attempted using `endDate` - `maximumCycleDuration`.
+Where `startDate` is missing from landCover products, gap-filling is attempted using
+`endDate` - `cycleDuration` (or `maximumCycleDuration` lookup).
 This is the `endDate` of the `landCover` product.
 This ensures no overlapping date ranges.
 If both `endDate` and `startDate` are missing from the product, these will be gap-filled from the `Cycle`.
@@ -18,9 +19,10 @@ If both `endDate` and `startDate` are missing from the product, these will be ga
 When nodes are chronologically consecutive with "% area" or "boolean" units and the same term and value, they are
 condensed into a single node to aid readability.
 """
+from typing import List
 from datetime import timedelta, datetime
 from functools import reduce
-from hestia_earth.schema import TermTermType, SiteSiteType
+from hestia_earth.schema import SchemaType, TermTermType, SiteSiteType, COMPLETENESS_MAPPING
 from hestia_earth.utils.lookup import column_name, get_table_value, download_lookup
 from hestia_earth.utils.model import filter_list_term_type
 from hestia_earth.utils.tools import safe_parse_float, flatten
@@ -42,7 +44,6 @@ REQUIREMENTS = {
         "related": {
             "Cycle": [{
                 "@type": "Cycle",
-                "startDate": "",
                 "endDate": "",
                 "products": [
                     {
@@ -71,7 +72,11 @@ REQUIREMENTS = {
                             "soilAmendment"
                         ]
                     }
-                ]
+                ],
+                "optional": {
+                    "startDate": "",
+                    "cycleDuration": ""
+                }
             }]
         }
     }
@@ -98,6 +103,14 @@ LOOKUPS = {
 }
 MODEL_KEY = 'management'
+_PRACTICES_TERM_TYPES = [
+    TermTermType.WATERREGIME,
+    TermTermType.TILLAGE,
+    TermTermType.CROPRESIDUEMANAGEMENT,
+    TermTermType.LANDUSEMANAGEMENT,
+    TermTermType.SYSTEM
+]
+_PRACTICES_COMPLETENESS_MAPPING = COMPLETENESS_MAPPING.get(SchemaType.PRACTICE.value, {})
 _ANIMAL_MANURE_USED_TERM_ID = "animalManureUsed"
 _INORGANIC_NITROGEN_FERTILISER_USED_TERM_ID = "inorganicNitrogenFertiliserUsed"
 _ORGANIC_FERTILISER_USED_TERM_ID = "organicFertiliserUsed"
@@ -147,11 +160,13 @@ def management(data: dict):
     return node
-def _get_maximum_cycle_duration(land_cover_id: str):
-    lookup = download_lookup("crop.csv")
-    return safe_parse_float(
-        get_table_value(lookup, column_name('landCoverTermId'), land_cover_id, column_name('maximumCycleDuration'))
-    )
+def _get_cycle_duration(cycle: dict, land_cover_id: str):
+    return cycle.get('cycleDuration') or safe_parse_float(get_table_value(
+        download_lookup("crop.csv"),
+        column_name('landCoverTermId'),
+        land_cover_id,
+        column_name('maximumCycleDuration')
+    ))
 def _gap_filled_date_only_str(date_str: str, mode: str = DatestrGapfillMode.END) -> str:
@@ -166,16 +181,16 @@ def _gap_filled_date_obj(date_str: str, mode: str = DatestrGapfillMode.END) -> d
 def _gap_filled_start_date(land_cover_id: str, end_date: str, cycle: dict) -> dict:
-    """If possible, gap-fill the startDate based on the endDate - maximumCycleDuration"""
-    maximum_cycle_duration = _get_maximum_cycle_duration(land_cover_id)
+    """If possible, gap-fill the startDate based on the endDate - cycleDuration"""
+    cycle_duration = _get_cycle_duration(cycle, land_cover_id)
     return {
         "startDate": max(
-            _gap_filled_date_obj(end_date) - timedelta(days=maximum_cycle_duration)
-            if maximum_cycle_duration else datetime.fromtimestamp(0),
+            _gap_filled_date_obj(end_date) - timedelta(days=cycle_duration)
+            if cycle_duration else datetime.fromtimestamp(0),
             _gap_filled_date_obj(cycle.get("startDate"), mode=DatestrGapfillMode.START)
             if cycle.get("startDate") else datetime.fromtimestamp(0)
         )
-    } if any([maximum_cycle_duration, cycle.get("startDate")]) else {}
+    } if any([cycle_duration, cycle.get("startDate")]) else {}
 def _include_with_date_gap_fill(value: dict, keys: list) -> dict:
@@ -217,12 +232,21 @@ def _copy_item_if_exists(source: dict, keys: list[str] = None, dest: dict = None
     return reduce(lambda p, c: p | ({c: source[c]} if source.get(c) else {}), keys or [], dest or {})
-def _get_relevant_items(cycle: dict, item_name: str, relevant_terms: list):
+def _get_relevant_items(cycle: dict, item_name: str, term_types: List[TermTermType], completeness_mapping: dict = {}):
     """
     Get items from the list of cycles with any of the relevant terms.
     Also adds dates from Cycle.
     """
-    items = [
+    # filter term types that are no complete
+    complete_term_types = term_types if not completeness_mapping else [
+        term_type for term_type in term_types
+        if any([
+            not completeness_mapping.get(term_type.value),
+            cycle.get('completeness', {}).get(completeness_mapping.get(term_type.value), False)
+        ])
+    ]
+    blank_nodes = filter_list_term_type(cycle.get(item_name, []), complete_term_types)
+    return [
         _include_with_date_gap_fill(cycle, ["startDate", "endDate"]) |
         _include(
             _gap_filled_start_date(
@@ -233,20 +257,19 @@ def _get_relevant_items(cycle: dict, item_name: str, relevant_terms: list):
             "startDate"
         ) |
         item
-        for item in filter_list_term_type(cycle.get(item_name, []), relevant_terms)
+        for item in blank_nodes
     ]
-    return items
 def _process_rule(node: dict, term: dict) -> list:
-    relevant_terms = []
+    term_types = []
     for column, condition, new_term in _INPUT_RULES[term.get('termType')]:
         lookup_result = get_lookup_value(term, LOOKUPS[column], model=MODEL, term=term.get('@id'), model_key=MODEL_KEY)
         if condition(lookup_result):
-            relevant_terms.append(node | {'id': new_term})
+            term_types.append(node | {'id': new_term})
-    return relevant_terms
+    return term_types
 def _run_from_inputs(site: dict, cycle: dict) -> list:
@@ -307,7 +330,7 @@ def _run_from_landCover(cycle: dict, crop_forage_products: list):
         )) for product in _get_relevant_items(
             cycle=cycle,
             item_name="products",
-            relevant_terms=[TermTermType.LANDCOVER]
+            term_types=[TermTermType.LANDCOVER]
         )
     ]
     return land_cover_products + _run_products(
@@ -337,7 +360,7 @@ def _run_from_crop_forage(cycle: dict, site: dict):
     products = _get_relevant_items(
         cycle=cycle,
         item_name="products",
-        relevant_terms=[TermTermType.CROP, TermTermType.FORAGE]
+        term_types=[TermTermType.CROP, TermTermType.FORAGE]
     ) if site.get("siteType", "") == SiteSiteType.CROPLAND.value else []
     # only take products with a matching landCover term
     products = [p for p in products if get_landCover_term_id(p.get('term', {}))]
@@ -372,13 +395,8 @@ def _run_from_practices(cycle: dict):
         ) for practice in _get_relevant_items(
             cycle=cycle,
             item_name="practices",
-            relevant_terms=[
-                TermTermType.WATERREGIME,
-                TermTermType.TILLAGE,
-                TermTermType.CROPRESIDUEMANAGEMENT,
-                TermTermType.LANDUSEMANAGEMENT,
-                TermTermType.SYSTEM
-            ]
+            term_types=_PRACTICES_TERM_TYPES,
+            completeness_mapping=_PRACTICES_COMPLETENESS_MAPPING
         )
     ]
     practices = list(map(_map_to_value, filter(_should_run_practice, practices)))

hestia_earth/models/site/pre_checks/cache_geospatialDatabase.py CHANGED Viewed

@@ -93,41 +93,46 @@ def _is_type(value: dict, ee_type: str):
     ]) if isinstance(params, list) else params.get('ee_type') == ee_type
-def list_collections(years: list = [], include_region: bool = False, years_only: bool = False):
+def list_rasters(years: list = [], years_only: bool = False):
     ee_params = list_ee_params()
     # only cache `raster` results as can be combined in a single query
     rasters = [value for value in ee_params if _is_type(value, 'raster')]
     rasters = _extend_collections(rasters, years or [])
     rasters = [raster for raster in rasters if not years_only or _is_collection_by_year(raster)]
+    return rasters
+def list_vectors(sites: list):
+    ee_params = list_ee_params()
+    vectors = [value for value in ee_params if _is_type(value, 'vector')]
     vectors = [
-        value for value in ee_params if _is_type(value, 'vector') and (
-            include_region or not value.get('params').get('collection', '').startswith('gadm36')
-        )
+        value for value in vectors
+        # name of the model is the key in the data. If the key is present in all sites, we don't need to query
+        if all([not s.get(value.get('name')) for s in sites])
     ]
     # no vectors are running with specific years
-    vectors = [] if years_only else _extend_collections(vectors)
+    vectors = _extend_collections(vectors)
-    return (rasters, vectors)
+    return vectors
 def _cache_results(site: dict, area_size: float):
     # to fetch data related to the year
     years = cached_value(site, key=CACHE_YEARS_KEY, default=[])
-    include_region = all([has_coordinates(site), not site.get('region')])
-    rasters, vectors = list_collections(years, include_region=include_region)
+    rasters = list_rasters(years)
+    vectors = list_vectors([site])
     raster_results = _run_query({
         'ee_type': 'raster',
-        'collections': rasters,
-        **geospatial_data(site)
-    })
+        'collections': rasters
+    } | geospatial_data(site)) if rasters else []
     vector_results = _run_query({
         'ee_type': 'vector',
-        'collections': vectors,
-        **geospatial_data(site)
-    })
+        'collections': vectors
+    } | geospatial_data(site)) if vectors else []
     return cache_site_results(raster_results + vector_results, rasters + vectors, area_size)

hestia_earth/models/utils/__init__.py CHANGED Viewed

@@ -165,6 +165,12 @@ def last_day_of_month(year: int, month: int):
     )
+def current_date(): return datetime.datetime.now().date().strftime('%Y-%m-%d')
+def current_year(): return int(current_date()[:4])
 def flatten_args(args) -> list:
     """
     Flatten the input args into a single list.

hestia_earth/models/utils/aggregated.py CHANGED Viewed

@@ -6,7 +6,8 @@ from hestia_earth.utils.model import find_term_match, linked_node
 from hestia_earth.utils.tools import safe_parse_date, non_empty_list
 from hestia_earth.models.log import debugValues, logShouldRun
-from hestia_earth.models.utils.cycle import is_organic
+from . import current_year
+from .cycle import is_organic
 MODEL_KEY = 'impactAssessment'
 MATCH_WORLD_QUERY = {'match': {'country.name.keyword': {'query': 'World', 'boost': 1}}}
@@ -14,7 +15,7 @@ MATCH_WORLD_QUERY = {'match': {'country.name.keyword': {'query': 'World', 'boost
 def aggregated_end_date(end_date: str):
     year = safe_parse_date(end_date).year
-    return round(math.floor(year / 10) * 10) + 9
+    return min([round(math.floor(year / 10) * 10) + 9, current_year()])
 def _match_region_country(region: dict, country: dict):
@@ -36,8 +37,7 @@ def _match_region_country(region: dict, country: dict):
     }
-def find_closest_impact(cycle: dict, end_date: str, input: dict, region: dict, country: dict, must_queries=[]):
-    term = input.get('term', {})
+def find_closest_impact(cycle: dict, end_date: str, term: dict, region: dict, country: dict, must_queries=[]):
     query = {
         'bool': {
             'must': non_empty_list([
@@ -74,21 +74,24 @@ def find_closest_impact(cycle: dict, end_date: str, input: dict, region: dict, c
 def _link_input_to_impact(model: str, cycle: dict, date: int):
     def run(input: dict):
-        term_id = input.get('term', {}).get('@id')
+        term = input.get('term', {})
+        term_id = term.get('@id')
         region = input.get('region')
         country = input.get('country')
-        impact = find_closest_impact(cycle, date, input, region, country)
+        impact = find_closest_impact(cycle, date, term, region, country)
+        search_by_region_id = (region or country or {}).get('@id') or 'region-world'
         debugValues(cycle, model=model, term=term_id, key=MODEL_KEY,
-                    input_region=(region or {}).get('@id'),
-                    input_country=(country or {}).get('@id'),
-                    impact=(impact or {}).get('@id'))
+                    search_by_input_term_id=term_id,
+                    search_by_region_id=search_by_region_id,
+                    search_by_end_date=str(date),
+                    impact_assessment_id_found=(impact or {}).get('@id'))
         should_run = all([impact is not None])
         logShouldRun(cycle, model, term_id, should_run)
         logShouldRun(cycle, model, term_id, should_run, key=MODEL_KEY)  # show specifically under Input
-        return {**input, MODEL_KEY: linked_node(impact), 'impactAssessmentIsProxy': True} if impact else None
+        return input | {MODEL_KEY: linked_node(impact), 'impactAssessmentIsProxy': True} if impact else None
     return run

hestia_earth/models/utils/array_builders.py CHANGED Viewed

@@ -527,12 +527,13 @@ def avg_run_in_rowwise(arr: NDArray, n: int):
     return avg_run_in_columnwise(arr.transpose(), n).transpose()
-def gen_seed(node: dict) -> int:
+def gen_seed(node: dict, *args: tuple[str]) -> int:
     """
-    Generate a seed based on a node's `@id` so that rng is the same each time the model is re-run.
+    Generate a seed based on a node's `@id` and optional args so that rng is the same each time the model is re-run.
     """
     node_id = node.get("@id", "")
-    hashed = hashlib.shake_128(node_id.encode(), usedforsecurity=False).hexdigest(4)
+    seed_str = "".join([node_id] + [str(arg) for arg in args])
+    hashed = hashlib.shake_128(seed_str.encode(), usedforsecurity=False).hexdigest(4)
     return abs(int(hashed, 16))

hestia_earth/models/utils/blank_node.py CHANGED Viewed

@@ -35,7 +35,7 @@ from .lookup import (
     is_product_id_allowed, is_product_termType_allowed,
     is_input_id_allowed, is_input_termType_allowed, _node_value
 )
-from .property import get_node_property, get_node_property_value, find_term_property
+from .property import get_node_property, get_node_property_value
 from .term import get_lookup_value
 from ..log import debugValues, log_as_table
@@ -270,7 +270,8 @@ def get_total_value_converted_with_min_ratio(
     model: str, term: str, node: dict = {},
     blank_nodes: list = [],
     prop_id: str = 'energyContentHigherHeatingValue',
-    min_ratio: float = 0.8
+    min_ratio: float = 0.8,
+    is_sum: bool = True
 ):
     values = [
         (
@@ -301,9 +302,14 @@ def get_total_value_converted_with_min_ratio(
     debugValues(node, model=model, term=term,
                 **logs)
-    return list_sum([
+    total_converted_value = list_sum([
         value * prop_value for term_id, value, prop_value in values if all([value, prop_value])
-    ]) * total_value / total_value_with_property if total_value_ratio >= min_ratio else None
+    ])
+    return (
+        total_converted_value * total_value / total_value_with_property if is_sum
+        else total_converted_value / total_value_with_property
+    ) if total_value_ratio >= min_ratio else None
 def get_N_total(nodes: list) -> list:
@@ -1462,16 +1468,20 @@ def _convert_via_property(node: dict, node_value: Union[int, float], property_fi
     Parameters
     ----------
-    node: a dict containing a term
-    node_value: value to be converted as float or int
-    property_field: str such as "density"
+    node: dict
+        Blank node containing a term
+    node_value: int | float
+        Value to be converted as float or int
+    property_field: str
+        E.g., "density"
-    Returns float or None
+    Returns
     -------
+        Float or None
     """
-    node_property = find_term_property(node, property_field, default={}, keep_in_memory=True)
-    node_property_value = safe_parse_float(node_property.get("value", 0))
+    node_property_value = get_node_property_value(
+        model=None, node=node, prop_id=property_field, default=0, handle_percents=False
+    )
     return node_value * node_property_value if node_value is not None and bool(node_property_value) else None
@@ -1480,7 +1490,7 @@ def convert_unit(node, dest_unit: Units, node_value: Union[int, float] = None) -
     Convert a number `value` inside a node or a optional `node_value` belonging to a term `node`, to unit `dest_unit`
     using the ATOMIC_WEIGHT_CONVERSIONS map or failing that, the PROPERTY_UNITS_CONVERSIONS map and lookups
     """
-    src_unit = node.get("units", "")
+    src_unit = node.get("units") or node.get('term', {}).get('units', "")
     node_value = _node_value(node) if node_value is None else node_value
@@ -1499,7 +1509,7 @@ def convert_unit_properties(node_value: Union[int, float], node: dict, dest_unit
     Uses cached calls to download_hestia() internally for speedup
     Returns None if no conversion possible.
     """
-    src_unit = node.get('units', '')
+    src_unit = node.get("units") or node.get('term', {}).get('units', "")
     conversions = PROPERTY_UNITS_CONVERSIONS.get(src_unit, {}).get(dest_unit.value, [])
     return reduce(
         lambda value, conversion_property_field: _convert_via_property(node, value, conversion_property_field),

hestia_earth/models/utils/lookup.py CHANGED Viewed

@@ -49,22 +49,24 @@ def all_factor_value(
 ):
     values = list(map(_factor_value(model, term_id, lookup_name, lookup_col, grouped_key), blank_nodes))
+    has_values = len(values) > 0
     missing_values = set([v.get('id') for v in values if v.get('value') is not None and v.get('coefficient') is None])
     all_with_factors = all([v.get('coefficient') is not None for v in values if v.get('value') is not None])
     for missing_value in missing_values:
-        debugMissingLookup(lookup_name, 'termid', missing_value, lookup_col, None)
+        debugMissingLookup(lookup_name, 'termid', missing_value, lookup_col, None, model=model, term=term_id)
     debugValues(node, model=model, term=term_id,
                 all_with_factors=all_with_factors,
                 missing_lookup_factor=';'.join(missing_values),
+                has_values=has_values,
                 values_used=log_as_table(values))
     values = [float((v.get('value') or 0) * (v.get('coefficient') or 0)) for v in values]
     # fail if some factors are missing
     return None if not all_with_factors else (
-        list_sum(values) if len(values) > 0 else default_no_values
+        list_sum(values) if has_values else default_no_values
     )

hestia_earth/models/utils/property.py CHANGED Viewed

@@ -57,7 +57,8 @@ def find_term_property(term, property: str, default=None, keep_in_memory=False)
     return find_term_match(props, property, default)
-def get_node_property(node: dict, property: str, find_default_property: bool = True):
+def get_node_property(node: dict, property: str, find_default_property: bool = True,
+                      keep_in_memory: bool = False) -> dict:
     """
     Get the property by `@id` linked to the Blank Node in the glossary.
@@ -73,6 +74,8 @@ def get_node_property(node: dict, property: str, find_default_property: bool = T
         The `term.@id` of the property. Example: `nitrogenContent`.
     find_default_property : bool
         Default to fetching the property from the `defaultProperties` of the `Term`.
+    keep_in_memory:
+        If True and find_default_property is True, will cache this term_id call to api
     Returns
     -------
@@ -80,7 +83,7 @@ def get_node_property(node: dict, property: str, find_default_property: bool = T
         The property if found, `None` otherwise.
     """
     prop = find_term_match(node.get('properties', []), property, None)
-    return find_term_property(node.get('term', {}), property, {}) if all([
+    return find_term_property(node.get('term', {}), property, {}, keep_in_memory) if all([
         find_default_property,
         prop is None
     ]) else (prop or {})

hestia_earth/models/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION = '0.65.11'
1	+ VERSION = '0.67.0'

hestia_earth/orchestrator/log.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 import sys
+import platform
+import resource
 import logging
 LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
@@ -42,6 +44,15 @@ if LOG_FILENAME is not None:
 def _join_args(**kwargs): return ', '.join([f"{key}={value}" for key, value in kwargs.items()])
+def log_memory_usage(**kwargs):
+    factor = 1024 * (
+        1024 if platform.system() in ['Darwin', 'Windows'] else 1
+    )
+    value = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / factor
+    extra = (', ' + _join_args(**kwargs)) if len(kwargs.keys()) > 0 else ''
+    logger.info('memory used=%s, unit=MB' + extra, value)
 def _log_node_suffix(node: dict = {}):
     node_type = node.get('@type', node.get('type')) if node else None
     node_id = node.get('@id', node.get('id', node.get('term', {}).get('@id'))) if node else None

hestia_earth/orchestrator/models/__init__.py CHANGED Viewed

@@ -7,7 +7,7 @@ from copy import deepcopy
 from hestia_earth.utils.tools import non_empty_list
 from hestia_earth.models.version import VERSION
-from ..log import logger
+from ..log import logger, log_memory_usage
 from ..utils import get_required_model_param, _snakecase
 from ..strategies.run import should_run
 from ..strategies.merge import merge
@@ -76,10 +76,15 @@ def _run_post_checks(data: dict):
 def _run_model(data: dict, model: dict, all_models: list):
-    module = _import_model(get_required_model_param(model, 'model'))
-    # if no value is provided, use all the models but this one
+    model_id = get_required_model_param(model, 'model')
     model_value = model.get('value') or _list_except_item(all_models, model)
+    log_memory_usage(model_model=model_id, model_value=model_value, step='before')
+    module = _import_model(model_id.replace('-', '_'))
+    # if no value is provided, use all the models but this one
     result = module.get('run')(model_value, data)
+    log_memory_usage(model_model=model_id, model_value=model_value, step='after')
     return {'data': data, 'model': model, 'version': module.get('version'), 'result': result}

hestia_earth/orchestrator/strategies/merge/merge_list.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import pydash
+from datetime import datetime
 from hestia_earth.schema import UNIQUENESS_FIELDS
+from hestia_earth.utils.tools import safe_parse_date
 from hestia_earth.orchestrator.utils import _non_empty_list, update_node_version
 from .merge_node import merge as merge_node
@@ -31,11 +33,17 @@ def _match_list_el(source: list, dest: list, key: str):
     return src_value == dest_value
-def _match_el(source: dict, dest: dict, keys: list):
+def _get_value(data: dict, key: str, merge_args: dict = {}):
+    value = pydash.objects.get(data, key)
+    date = safe_parse_date(value) if key in ['startDate', 'endDate'] else None
+    return datetime.strftime(date, merge_args.get('matchDatesFormat', '%Y-%m-%d')) if date else value
+def _match_el(source: dict, dest: dict, keys: list, merge_args: dict = {}):
     def match(key: str):
         keys = key.split('.')
-        src_value = pydash.objects.get(source, key)
-        dest_value = pydash.objects.get(dest, key)
+        src_value = _get_value(source, key, merge_args)
+        dest_value = _get_value(dest, key, merge_args)
         is_list = len(keys) >= 2 and (
             isinstance(pydash.objects.get(source, keys[0]), list) or
             isinstance(pydash.objects.get(dest, keys[0]), list)
@@ -68,7 +76,7 @@ def _handle_local_property(values: list, properties: list, local_id: str):
     return properties
-def _find_match_el_index(values: list, el: dict, same_methodModel: bool, model: dict, node_type: str):
+def _find_match_el_index(values: list, el: dict, same_methodModel: bool, model: dict, node_type: str, merge_args: dict):
     """
     Find an element in the values that match the new element, based on the unique properties.
     To find a matching element:
@@ -83,7 +91,10 @@ def _find_match_el_index(values: list, el: dict, same_methodModel: bool, model:
     ]
     properties = _handle_local_property(values, properties, 'impactAssessment.id')
-    return next((i for i in range(len(values)) if _match_el(values[i], el, properties)), None) if properties else None
+    return next(
+        (i for i in range(len(values)) if _match_el(values[i], el, properties, merge_args)),
+        None
+    ) if properties else None
 def merge(source: list, merge_with: list, version: str, model: dict = {}, merge_args: dict = {}, node_type: str = ''):
@@ -95,7 +106,7 @@ def merge(source: list, merge_with: list, version: str, model: dict = {}, merge_
     skip_same_term = merge_args.get('skipSameTerm', False)
     for el in _non_empty_list(merge_with):
-        source_index = _find_match_el_index(source, el, same_methodModel, model, node_type)
+        source_index = _find_match_el_index(source, el, same_methodModel, model, node_type, merge_args)
         if source_index is None:
             source.append(update_node_version(version, el))
         elif not skip_same_term:

{hestia_earth_models-0.65.11.dist-info → hestia_earth_models-0.67.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hestia-earth-models
-Version: 0.65.11
+Version: 0.67.0
 Summary: HESTIA's set of modules for filling gaps in the activity data using external datasets (e.g. populating soil properties with a geospatial dataset using provided coordinates) and internal lookups (e.g. populating machinery use from fuel use). Includes rules for when gaps should be filled versus not (e.g. never gap fill yield, gap fill crop residue if yield provided etc.).
 Home-page: https://gitlab.com/hestia-earth/hestia-engine-models
 Author: HESTIA Team

hestia-earth-models 0.65.11__py3-none-any.whl → 0.67.0__py3-none-any.whl

hestia-earth-models 0.65.11py3-none-any.whl → 0.67.0py3-none-any.whl