PyPI - hestia-earth-models - Versions diffs - 0.65.11__py3-none-any.whl → 0.66.0__py3-none-any.whl - Mend

hestia-earth-models 0.65.11py3-none-any.whl → 0.66.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

hestia_earth/models/mocking/search-results.json CHANGED Viewed

@@ -1768,7 +1768,7 @@
         "@type": "Term",
         "name": "Generic crop, seed",
         "@id": "genericCropSeed",
-        "_score": 25.442797
+        "_score": 25.417622
       }
     ]
   },
@@ -2004,157 +2004,157 @@
         "@type": "Term",
         "name": "Glass or high accessible cover",
         "@id": "glassOrHighAccessibleCover",
-        "_score": 64.880936
+        "_score": 64.78735
       },
       {
         "@type": "Term",
         "name": "Sea or ocean",
         "@id": "seaOrOcean",
-        "_score": 54.485077
+        "_score": 54.49869
       },
       {
         "@type": "Term",
         "name": "River or stream",
         "@id": "riverOrStream",
-        "_score": 52.44936
+        "_score": 52.33286
       },
       {
         "@type": "Term",
         "name": "Other natural vegetation",
         "@id": "otherNaturalVegetation",
-        "_score": 43.053535
+        "_score": 43.071865
       },
       {
         "@type": "Term",
         "name": "Agri-food processor",
         "@id": "agriFoodProcessor",
-        "_score": 42.061752
+        "_score": 41.75925
       },
       {
         "@type": "Term",
         "name": "Food retailer",
         "@id": "foodRetailer",
-        "_score": 41.523476
+        "_score": 41.196114
       },
       {
         "@type": "Term",
         "name": "Natural forest",
         "@id": "naturalForest",
-        "_score": 32.727547
+        "_score": 32.262592
       },
       {
         "@type": "Term",
         "name": "Permanent pasture",
         "@id": "permanentPasture",
-        "_score": 28.696854
+        "_score": 28.761673
       },
       {
         "@type": "Term",
         "name": "Animal housing",
         "@id": "animalHousing",
-        "_score": 27.970219
+        "_score": 27.809086
       },
       {
         "@type": "Term",
         "name": "Root or tuber crop plant",
         "@id": "rootOrTuberCropPlant",
-        "_score": 27.436356
+        "_score": 27.444334
       },
       {
         "@type": "Term",
         "name": "High intensity grazing pasture",
         "@id": "highIntensityGrazingPasture",
-        "_score": 24.657646
+        "_score": 24.399143
       },
       {
         "@type": "Term",
         "name": "Forest",
         "@id": "forest",
-        "_score": 20.619322
+        "_score": 20.347794
       },
       {
         "@type": "Term",
         "name": "Permanent cropland",
         "@id": "permanentCropland",
-        "_score": 20.092436
+        "_score": 20.196049
       },
       {
         "@type": "Term",
         "name": "Other land",
         "@id": "otherLand",
-        "_score": 19.758368
+        "_score": 19.765242
       },
       {
         "@type": "Term",
         "name": "Plantation forest",
         "@id": "plantationForest",
-        "_score": 19.308796
-      },
-      {
-        "@type": "Term",
-        "name": "Sea kale plant",
-        "@id": "seaKalePlant",
-        "_score": 18.2738
+        "_score": 19.03003
       },
       {
         "@type": "Term",
         "name": "Lake",
         "@id": "lake",
-        "_score": 18.255703
+        "_score": 18.264688
       },
       {
         "@type": "Term",
-        "name": "Red sea plume alga",
-        "@id": "redSeaPlumeAlga",
-        "_score": 18.100435
+        "name": "Sea kale plant",
+        "@id": "seaKalePlant",
+        "_score": 18.135399
       },
       {
         "@type": "Term",
         "name": "Native pasture",
         "@id": "nativePasture",
-        "_score": 17.83277
+        "_score": 17.863037
       },
       {
         "@type": "Term",
         "name": "Improved pasture",
         "@id": "improvedPasture",
-        "_score": 17.396095
+        "_score": 17.444498
+      },
+      {
+        "@type": "Term",
+        "name": "Red sea plume alga",
+        "@id": "redSeaPlumeAlga",
+        "_score": 17.235117
       },
       {
         "@type": "Term",
         "name": "Nominally managed pasture",
         "@id": "nominallyManagedPasture",
-        "_score": 16.762512
+        "_score": 16.79026
       },
       {
         "@type": "Term",
         "name": "Severely degraded pasture",
         "@id": "severelyDegradedPasture",
-        "_score": 16.237305
+        "_score": 16.32682
       },
       {
         "@type": "Term",
         "name": "Pond",
         "@id": "pond",
-        "_score": 15.637012
+        "_score": 15.64036
       },
       {
         "@type": "Term",
         "name": "River tamarind tree",
         "@id": "riverTamarindTree",
-        "_score": 15.435883
+        "_score": 15.445333
       },
       {
         "@type": "Term",
         "name": "Annual cropland",
         "@id": "annualCropland",
-        "_score": 9.802788
+        "_score": 9.808704
       },
       {
         "@type": "Term",
         "name": "Cropland",
         "@id": "cropland",
-        "_score": 9.768499
+        "_score": 9.772207
       }
     ]
   },

hestia_earth/models/site/defaultMethodClassification.py CHANGED Viewed

@@ -5,6 +5,8 @@ When gap-filling `management` node on Site, the
 `defaultMethodClassification` and `defaultMethodClassificationDescription` fields become required.
 This model will use the first value in the `management` node.
 """
+from hestia_earth.schema import SiteDefaultMethodClassification
 from hestia_earth.models.log import logRequirements, logShouldRun
 from . import MODEL
@@ -20,12 +22,17 @@ MODEL_KEY = 'defaultMethodClassification'
 def _should_run(site: dict):
-    methodClassification = next((n.get('methodClassification') for n in site.get('management', [])), None)
+    has_management = bool(site.get('management', []))
+    methodClassification = next(
+        (n.get('methodClassification') for n in site.get('management', [])),
+        None
+    ) or SiteDefaultMethodClassification.MODELLED.value
     logRequirements(site, model=MODEL, model_key=MODEL_KEY,
+                    has_management=has_management,
                     methodClassification=methodClassification)
-    should_run = all([methodClassification])
+    should_run = all([has_management, methodClassification])
     logShouldRun(site, MODEL, None, should_run, model_key=MODEL_KEY)
     return should_run, methodClassification

hestia_earth/models/site/defaultMethodClassificationDescription.py CHANGED Viewed

@@ -20,16 +20,18 @@ MODEL_KEY = 'defaultMethodClassificationDescription'
 def _should_run(site: dict):
+    has_management = bool(site.get('management', []))
     methodClassificationDescription = next((
         n.get('methodClassificationDescription')
         for n in site.get('management', [])
         if n.get('methodClassification')
-    ), None)
+    ), None) or 'Data calculated by merging real land use histories and modelled land use histories for each Site.'
     logRequirements(site, model=MODEL, model_key=MODEL_KEY,
+                    has_management=has_management,
                     methodClassificationDescription=methodClassificationDescription)
-    should_run = all([methodClassificationDescription])
+    should_run = all([has_management, methodClassificationDescription])
     logShouldRun(site, MODEL, None, should_run, model_key=MODEL_KEY)
     return should_run, methodClassificationDescription

hestia_earth/models/site/management.py CHANGED Viewed

@@ -10,7 +10,8 @@ tillage, cropResidueManagement and landUseManagement.
 All values are copied from the source node, except for crop and forage terms in which case the dates are copied from the
 cycle.
-Where `startDate` is missing from landCover products, gap-filling is attempted using `endDate` - `maximumCycleDuration`.
+Where `startDate` is missing from landCover products, gap-filling is attempted using
+`endDate` - `cycleDuration` (or `maximumCycleDuration` lookup).
 This is the `endDate` of the `landCover` product.
 This ensures no overlapping date ranges.
 If both `endDate` and `startDate` are missing from the product, these will be gap-filled from the `Cycle`.
@@ -18,9 +19,10 @@ If both `endDate` and `startDate` are missing from the product, these will be ga
 When nodes are chronologically consecutive with "% area" or "boolean" units and the same term and value, they are
 condensed into a single node to aid readability.
 """
+from typing import List
 from datetime import timedelta, datetime
 from functools import reduce
-from hestia_earth.schema import TermTermType, SiteSiteType
+from hestia_earth.schema import SchemaType, TermTermType, SiteSiteType, COMPLETENESS_MAPPING
 from hestia_earth.utils.lookup import column_name, get_table_value, download_lookup
 from hestia_earth.utils.model import filter_list_term_type
 from hestia_earth.utils.tools import safe_parse_float, flatten
@@ -42,7 +44,6 @@ REQUIREMENTS = {
         "related": {
             "Cycle": [{
                 "@type": "Cycle",
-                "startDate": "",
                 "endDate": "",
                 "products": [
                     {
@@ -71,7 +72,11 @@ REQUIREMENTS = {
                             "soilAmendment"
                         ]
                     }
-                ]
+                ],
+                "optional": {
+                    "startDate": "",
+                    "cycleDuration": ""
+                }
             }]
         }
     }
@@ -98,6 +103,14 @@ LOOKUPS = {
 }
 MODEL_KEY = 'management'
+_PRACTICES_TERM_TYPES = [
+    TermTermType.WATERREGIME,
+    TermTermType.TILLAGE,
+    TermTermType.CROPRESIDUEMANAGEMENT,
+    TermTermType.LANDUSEMANAGEMENT,
+    TermTermType.SYSTEM
+]
+_PRACTICES_COMPLETENESS_MAPPING = COMPLETENESS_MAPPING.get(SchemaType.PRACTICE.value)
 _ANIMAL_MANURE_USED_TERM_ID = "animalManureUsed"
 _INORGANIC_NITROGEN_FERTILISER_USED_TERM_ID = "inorganicNitrogenFertiliserUsed"
 _ORGANIC_FERTILISER_USED_TERM_ID = "organicFertiliserUsed"
@@ -147,11 +160,13 @@ def management(data: dict):
     return node
-def _get_maximum_cycle_duration(land_cover_id: str):
-    lookup = download_lookup("crop.csv")
-    return safe_parse_float(
-        get_table_value(lookup, column_name('landCoverTermId'), land_cover_id, column_name('maximumCycleDuration'))
-    )
+def _get_cycle_duration(cycle: dict, land_cover_id: str):
+    return cycle.get('cycleDuration') or safe_parse_float(get_table_value(
+        download_lookup("crop.csv"),
+        column_name('landCoverTermId'),
+        land_cover_id,
+        column_name('maximumCycleDuration')
+    ))
 def _gap_filled_date_only_str(date_str: str, mode: str = DatestrGapfillMode.END) -> str:
@@ -166,16 +181,16 @@ def _gap_filled_date_obj(date_str: str, mode: str = DatestrGapfillMode.END) -> d
 def _gap_filled_start_date(land_cover_id: str, end_date: str, cycle: dict) -> dict:
-    """If possible, gap-fill the startDate based on the endDate - maximumCycleDuration"""
-    maximum_cycle_duration = _get_maximum_cycle_duration(land_cover_id)
+    """If possible, gap-fill the startDate based on the endDate - cycleDuration"""
+    cycle_duration = _get_cycle_duration(cycle, land_cover_id)
     return {
         "startDate": max(
-            _gap_filled_date_obj(end_date) - timedelta(days=maximum_cycle_duration)
-            if maximum_cycle_duration else datetime.fromtimestamp(0),
+            _gap_filled_date_obj(end_date) - timedelta(days=cycle_duration)
+            if cycle_duration else datetime.fromtimestamp(0),
             _gap_filled_date_obj(cycle.get("startDate"), mode=DatestrGapfillMode.START)
             if cycle.get("startDate") else datetime.fromtimestamp(0)
         )
-    } if any([maximum_cycle_duration, cycle.get("startDate")]) else {}
+    } if any([cycle_duration, cycle.get("startDate")]) else {}
 def _include_with_date_gap_fill(value: dict, keys: list) -> dict:
@@ -217,12 +232,21 @@ def _copy_item_if_exists(source: dict, keys: list[str] = None, dest: dict = None
     return reduce(lambda p, c: p | ({c: source[c]} if source.get(c) else {}), keys or [], dest or {})
-def _get_relevant_items(cycle: dict, item_name: str, relevant_terms: list):
+def _get_relevant_items(cycle: dict, item_name: str, term_types: List[TermTermType], completeness_mapping: dict = {}):
     """
     Get items from the list of cycles with any of the relevant terms.
     Also adds dates from Cycle.
     """
-    items = [
+    # filter term types that are no complete
+    complete_term_types = term_types if not completeness_mapping else [
+        term_type for term_type in term_types
+        if any([
+            not completeness_mapping.get(term_type.value),
+            cycle.get('completeness').get(completeness_mapping.get(term_type.value), False)
+        ])
+    ]
+    blank_nodes = filter_list_term_type(cycle.get(item_name, []), complete_term_types)
+    return [
         _include_with_date_gap_fill(cycle, ["startDate", "endDate"]) |
         _include(
             _gap_filled_start_date(
@@ -233,20 +257,19 @@ def _get_relevant_items(cycle: dict, item_name: str, relevant_terms: list):
             "startDate"
         ) |
         item
-        for item in filter_list_term_type(cycle.get(item_name, []), relevant_terms)
+        for item in blank_nodes
     ]
-    return items
 def _process_rule(node: dict, term: dict) -> list:
-    relevant_terms = []
+    term_types = []
     for column, condition, new_term in _INPUT_RULES[term.get('termType')]:
         lookup_result = get_lookup_value(term, LOOKUPS[column], model=MODEL, term=term.get('@id'), model_key=MODEL_KEY)
         if condition(lookup_result):
-            relevant_terms.append(node | {'id': new_term})
+            term_types.append(node | {'id': new_term})
-    return relevant_terms
+    return term_types
 def _run_from_inputs(site: dict, cycle: dict) -> list:
@@ -307,7 +330,7 @@ def _run_from_landCover(cycle: dict, crop_forage_products: list):
         )) for product in _get_relevant_items(
             cycle=cycle,
             item_name="products",
-            relevant_terms=[TermTermType.LANDCOVER]
+            term_types=[TermTermType.LANDCOVER]
         )
     ]
     return land_cover_products + _run_products(
@@ -337,7 +360,7 @@ def _run_from_crop_forage(cycle: dict, site: dict):
     products = _get_relevant_items(
         cycle=cycle,
         item_name="products",
-        relevant_terms=[TermTermType.CROP, TermTermType.FORAGE]
+        term_types=[TermTermType.CROP, TermTermType.FORAGE]
     ) if site.get("siteType", "") == SiteSiteType.CROPLAND.value else []
     # only take products with a matching landCover term
     products = [p for p in products if get_landCover_term_id(p.get('term', {}))]
@@ -372,13 +395,8 @@ def _run_from_practices(cycle: dict):
         ) for practice in _get_relevant_items(
             cycle=cycle,
             item_name="practices",
-            relevant_terms=[
-                TermTermType.WATERREGIME,
-                TermTermType.TILLAGE,
-                TermTermType.CROPRESIDUEMANAGEMENT,
-                TermTermType.LANDUSEMANAGEMENT,
-                TermTermType.SYSTEM
-            ]
+            term_types=_PRACTICES_TERM_TYPES,
+            completeness_mapping=_PRACTICES_COMPLETENESS_MAPPING
         )
     ]
     practices = list(map(_map_to_value, filter(_should_run_practice, practices)))

hestia_earth/models/site/pre_checks/cache_geospatialDatabase.py CHANGED Viewed

@@ -93,41 +93,46 @@ def _is_type(value: dict, ee_type: str):
     ]) if isinstance(params, list) else params.get('ee_type') == ee_type
-def list_collections(years: list = [], include_region: bool = False, years_only: bool = False):
+def list_rasters(years: list = [], years_only: bool = False):
     ee_params = list_ee_params()
     # only cache `raster` results as can be combined in a single query
     rasters = [value for value in ee_params if _is_type(value, 'raster')]
     rasters = _extend_collections(rasters, years or [])
     rasters = [raster for raster in rasters if not years_only or _is_collection_by_year(raster)]
+    return rasters
+def list_vectors(sites: list):
+    ee_params = list_ee_params()
+    vectors = [value for value in ee_params if _is_type(value, 'vector')]
     vectors = [
-        value for value in ee_params if _is_type(value, 'vector') and (
-            include_region or not value.get('params').get('collection', '').startswith('gadm36')
-        )
+        value for value in vectors
+        # name of the model is the key in the data. If the key is present in all sites, we don't need to query
+        if all([not s.get(value.get('name')) for s in sites])
     ]
     # no vectors are running with specific years
-    vectors = [] if years_only else _extend_collections(vectors)
+    vectors = _extend_collections(vectors)
-    return (rasters, vectors)
+    return vectors
 def _cache_results(site: dict, area_size: float):
     # to fetch data related to the year
     years = cached_value(site, key=CACHE_YEARS_KEY, default=[])
-    include_region = all([has_coordinates(site), not site.get('region')])
-    rasters, vectors = list_collections(years, include_region=include_region)
+    rasters = list_rasters(years)
+    vectors = list_vectors([site])
     raster_results = _run_query({
         'ee_type': 'raster',
-        'collections': rasters,
-        **geospatial_data(site)
-    })
+        'collections': rasters
+    } | geospatial_data(site)) if rasters else []
     vector_results = _run_query({
         'ee_type': 'vector',
-        'collections': vectors,
-        **geospatial_data(site)
-    })
+        'collections': vectors
+    } | geospatial_data(site)) if vectors else []
     return cache_site_results(raster_results + vector_results, rasters + vectors, area_size)

hestia_earth/models/utils/blank_node.py CHANGED Viewed

@@ -270,7 +270,8 @@ def get_total_value_converted_with_min_ratio(
     model: str, term: str, node: dict = {},
     blank_nodes: list = [],
     prop_id: str = 'energyContentHigherHeatingValue',
-    min_ratio: float = 0.8
+    min_ratio: float = 0.8,
+    is_sum: bool = True
 ):
     values = [
         (
@@ -301,9 +302,14 @@ def get_total_value_converted_with_min_ratio(
     debugValues(node, model=model, term=term,
                 **logs)
-    return list_sum([
+    total_converted_value = list_sum([
         value * prop_value for term_id, value, prop_value in values if all([value, prop_value])
-    ]) * total_value / total_value_with_property if total_value_ratio >= min_ratio else None
+    ])
+    return (
+        total_converted_value * total_value / total_value_with_property if is_sum
+        else total_converted_value / total_value_with_property
+    ) if total_value_ratio >= min_ratio else None
 def get_N_total(nodes: list) -> list:

hestia_earth/models/utils/lookup.py CHANGED Viewed

@@ -53,7 +53,7 @@ def all_factor_value(
     all_with_factors = all([v.get('coefficient') is not None for v in values if v.get('value') is not None])
     for missing_value in missing_values:
-        debugMissingLookup(lookup_name, 'termid', missing_value, lookup_col, None)
+        debugMissingLookup(lookup_name, 'termid', missing_value, lookup_col, None, model=model, term=term_id)
     debugValues(node, model=model, term=term_id,
                 all_with_factors=all_with_factors,

hestia_earth/models/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION = '0.65.11'
1	+ VERSION = '0.66.0'

hestia_earth/orchestrator/strategies/merge/merge_list.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import pydash
+from datetime import datetime
 from hestia_earth.schema import UNIQUENESS_FIELDS
+from hestia_earth.utils.tools import safe_parse_date
 from hestia_earth.orchestrator.utils import _non_empty_list, update_node_version
 from .merge_node import merge as merge_node
@@ -31,11 +33,17 @@ def _match_list_el(source: list, dest: list, key: str):
     return src_value == dest_value
-def _match_el(source: dict, dest: dict, keys: list):
+def _get_value(data: dict, key: str, merge_args: dict = {}):
+    value = pydash.objects.get(data, key)
+    date = safe_parse_date(value) if key in ['startDate', 'endDate'] else None
+    return datetime.strftime(date, merge_args.get('matchDatesFormat', '%Y-%m-%d')) if date else value
+def _match_el(source: dict, dest: dict, keys: list, merge_args: dict = {}):
     def match(key: str):
         keys = key.split('.')
-        src_value = pydash.objects.get(source, key)
-        dest_value = pydash.objects.get(dest, key)
+        src_value = _get_value(source, key, merge_args)
+        dest_value = _get_value(dest, key, merge_args)
         is_list = len(keys) >= 2 and (
             isinstance(pydash.objects.get(source, keys[0]), list) or
             isinstance(pydash.objects.get(dest, keys[0]), list)
@@ -68,7 +76,7 @@ def _handle_local_property(values: list, properties: list, local_id: str):
     return properties
-def _find_match_el_index(values: list, el: dict, same_methodModel: bool, model: dict, node_type: str):
+def _find_match_el_index(values: list, el: dict, same_methodModel: bool, model: dict, node_type: str, merge_args: dict):
     """
     Find an element in the values that match the new element, based on the unique properties.
     To find a matching element:
@@ -83,7 +91,10 @@ def _find_match_el_index(values: list, el: dict, same_methodModel: bool, model:
     ]
     properties = _handle_local_property(values, properties, 'impactAssessment.id')
-    return next((i for i in range(len(values)) if _match_el(values[i], el, properties)), None) if properties else None
+    return next(
+        (i for i in range(len(values)) if _match_el(values[i], el, properties, merge_args)),
+        None
+    ) if properties else None
 def merge(source: list, merge_with: list, version: str, model: dict = {}, merge_args: dict = {}, node_type: str = ''):
@@ -95,7 +106,7 @@ def merge(source: list, merge_with: list, version: str, model: dict = {}, merge_
     skip_same_term = merge_args.get('skipSameTerm', False)
     for el in _non_empty_list(merge_with):
-        source_index = _find_match_el_index(source, el, same_methodModel, model, node_type)
+        source_index = _find_match_el_index(source, el, same_methodModel, model, node_type, merge_args)
         if source_index is None:
             source.append(update_node_version(version, el))
         elif not skip_same_term:

{hestia_earth_models-0.65.11.dist-info → hestia_earth_models-0.66.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hestia-earth-models
-Version: 0.65.11
+Version: 0.66.0
 Summary: HESTIA's set of modules for filling gaps in the activity data using external datasets (e.g. populating soil properties with a geospatial dataset using provided coordinates) and internal lookups (e.g. populating machinery use from fuel use). Includes rules for when gaps should be filled versus not (e.g. never gap fill yield, gap fill crop residue if yield provided etc.).
 Home-page: https://gitlab.com/hestia-earth/hestia-engine-models
 Author: HESTIA Team

hestia-earth-models 0.65.11__py3-none-any.whl → 0.66.0__py3-none-any.whl

hestia-earth-models 0.65.11py3-none-any.whl → 0.66.0py3-none-any.whl