PyPI - hestia-earth-models - Versions diffs - 0.70.6__py3-none-any.whl → 0.72.0__py3-none-any.whl - Mend

hestia-earth-models 0.70.6py3-none-any.whl → 0.72.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

hestia_earth/models/cache_nodes.py ADDED Viewed

@@ -0,0 +1,157 @@
+import os
+from functools import reduce
+from hestia_earth.schema import NodeType
+from hestia_earth.utils.tools import current_time_ms, flatten
+from hestia_earth.earth_engine import init_gee
+from .log import logger
+from .utils import CACHE_KEY
+from .utils.site import years_from_cycles
+from .utils.source import CACHE_SOURCES_KEY, find_sources
+from .cache_sites import run as cache_sites
+CACHE_RELATED_KEY = 'related'
+CACHE_NESTED_KEY = 'nested'
+_CACHE_BATCH_SIZE = int(os.getenv('CACHE_SITES_BATCH_SIZE', '5000'))
+_ENABLE_CACHE_YEARS = os.getenv('ENABLE_CACHE_YEARS', 'true') == 'true'
+_ENABLE_CACHE_RELATED_NODES = os.getenv('ENABLE_CACHE_RELATED_NODES', 'true') == 'true'
+_CACHE_NODE_TYPES = [
+    NodeType.SITE.value,
+    NodeType.CYCLE.value,
+    NodeType.IMPACTASSESSMENT.value
+]
+def _pop_items(values: list, nb_items: int):
+    if len(values) < nb_items:
+        removed_items = values[:]  # Get a copy of the entire array
+        values.clear()  # Remove all items from the original array
+    else:
+        removed_items = values[:nb_items]  # Get the first N items
+        del values[:nb_items]  # Remove the first N items from the original array
+    return removed_items
+def _filter_by_type(nodes: list, type: str): return [n for n in nodes if n.get('@type', n.get('type')) == type]
+def _node_key(node: dict): return '/'.join([node.get('type', node.get('@type')), node.get('id', node.get('@id'))])
+def _years_from_cycles(nodes: dict): return years_from_cycles(_filter_by_type(nodes, NodeType.CYCLE.value))
+def _linked_node(data: dict): return {'type': data.get('type'), 'id': data.get('id')}
+def _find_nested_nodes(data) -> list[dict]:
+    if isinstance(data, dict):
+        if data.get('type') in _CACHE_NODE_TYPES and data.get('id'):
+            return [_linked_node(data)]
+        return flatten(_find_nested_nodes(list(data.values())))
+    if isinstance(data, list):
+        return flatten(map(_find_nested_nodes, data))
+    return []
+def _nested_nodes(node_keys: list[str]):
+    def exec(group: dict, node: dict):
+        nested_nodes = _find_nested_nodes(list(node.values()))
+        for nested_node in nested_nodes:
+            group_id = _node_key(nested_node)
+            group[group_id] = group.get(group_id, {})
+            group[group_id][CACHE_RELATED_KEY] = group.get(group_id, {}).get(CACHE_RELATED_KEY, []) + [
+                _linked_node(node)
+            ]
+            # cache nodes that current node refers (nesting)
+            if group_id in node_keys:
+                group_id = _node_key(node)
+                group[group_id] = group.get(group_id, {})
+                group[group_id][CACHE_NESTED_KEY] = group.get(group_id, {}).get(CACHE_NESTED_KEY, []) + [
+                    _linked_node(nested_node)
+                ]
+        return group
+    return exec
+def _cache_related_nodes(nodes: list):
+    # only cache nodes included in the file
+    nodes_keys = list(map(_node_key, nodes))
+    # for each node, compile list of nested nodes
+    nested_nodes_mapping = reduce(_nested_nodes(nodes_keys), nodes, {})
+    def cache_related_node(node: dict):
+        nodes_mapping = nested_nodes_mapping.get(_node_key(node), {})
+        related_nodes = nodes_mapping.get(CACHE_RELATED_KEY) or []
+        nested_nodes = nodes_mapping.get(CACHE_NESTED_KEY) or []
+        # save in cache
+        cached_data = node.get(CACHE_KEY, {}) | {
+            CACHE_RELATED_KEY: related_nodes,
+            CACHE_NESTED_KEY: nested_nodes
+        }
+        return node | {CACHE_KEY: cached_data}
+    return list(map(cache_related_node, nodes))
+def _cache_sources(nodes: list):
+    sources = find_sources()
+    return [
+        n | ({
+            CACHE_KEY: n.get(CACHE_KEY, {}) | {CACHE_SOURCES_KEY: sources}
+        } if n.get('type', n.get('@type')) in _CACHE_NODE_TYPES else {})
+        for n in nodes
+    ]
+def _safe_cache_sites(sites: list, years: list):
+    try:
+        return cache_sites(sites, years)
+    except Exception as e:
+        logger.error(f"An error occured while caching nodes on EE: {str(e)}")
+        if 'exceeded' in str(e):
+            logger.debug('Fallback to caching sites one by one')
+            # run one by one in case the batching does not work
+            return flatten([cache_sites([site], years) for site in sites])
+        else:
+            raise e
+def _cache_sites(nodes: list, batch_size: int = _CACHE_BATCH_SIZE):
+    start = current_time_ms()
+    # build list of nodes by key to update as sites are processed
+    nodes_mapping = {_node_key(n): n for n in nodes}
+    years = _years_from_cycles(nodes) if _ENABLE_CACHE_YEARS else []
+    sites = _filter_by_type(nodes, 'Site')
+    while len(sites) > 0:
+        batch_values = _pop_items(sites, batch_size)
+        logger.info(f"Processing {len(batch_values)} sites / {len(sites)} remaining.")
+        results = _safe_cache_sites(batch_values, years)
+        for result in results:
+            nodes_mapping[_node_key(result)] = result
+    logger.info(f"Done caching sites in {current_time_ms() - start} ms")
+    # replace original sites with new cached sites
+    return list(nodes_mapping.values())
+def run(nodes: list):
+    init_gee()
+    # cache sites data
+    cached_nodes = _cache_sites(nodes)
+    # cache related nodes
+    cached_nodes = _cache_related_nodes(cached_nodes) if _ENABLE_CACHE_RELATED_NODES else cached_nodes
+    # cache sources
+    return _cache_sources(cached_nodes)

hestia_earth/models/cache_sites.py CHANGED Viewed

@@ -81,7 +81,7 @@ def _run_values(
         site_cache = merge(
             site.get(CACHE_KEY, {}),
             {CACHE_GEOSPATIAL_KEY: cached_data},
-            ({CACHE_YEARS_KEY: list(set(cached_value(site, CACHE_YEARS_KEY, []) + years))} if years else {})
+            ({CACHE_YEARS_KEY: sorted(list(set(cached_value(site, CACHE_YEARS_KEY, []) + years)))} if years else {})
         )
         return merge(site, {CACHE_KEY: site_cache})

hestia_earth/models/config/Cycle.json CHANGED Viewed

@@ -2054,36 +2054,6 @@
         },
         "stage": 2
       },
-      {
-        "key": "emissions",
-        "model": "ipcc2006",
-        "value": "n2OToAirOrganicSoilCultivationDirect",
-        "runStrategy": "add_blank_node_if_missing",
-        "runArgs": {
-          "runNonMeasured": true,
-          "runNonAddedTerm": true
-        },
-        "mergeStrategy": "list",
-        "mergeArgs": {
-          "replaceThreshold": ["value", 0.01]
-        },
-        "stage": 2
-      },
-      {
-        "key": "emissions",
-        "model": "ipcc2006",
-        "value": "co2ToAirOrganicSoilCultivation",
-        "runStrategy": "add_blank_node_if_missing",
-        "runArgs": {
-          "runNonMeasured": true,
-          "runNonAddedTerm": true
-        },
-        "mergeStrategy": "list",
-        "mergeArgs": {
-          "replaceThreshold": ["value", 0.01]
-        },
-        "stage": 2
-      },
       {
         "key": "emissions",
         "model": "ipcc2006",

hestia_earth/models/config/Site.json CHANGED Viewed

@@ -88,6 +88,14 @@
         "runStrategy": "add_blank_node_if_missing",
         "mergeStrategy": "list",
         "stage": 1
+      },
+      {
+        "key": "measurements",
+        "model": "hestia",
+        "value": "histosol",
+        "runStrategy": "add_blank_node_if_missing",
+        "mergeStrategy": "list",
+        "stage": 1
       }
     ],
     [

hestia_earth/models/data/ecoinventV3/__init__.py CHANGED Viewed

@@ -5,17 +5,19 @@ from hestia_earth.utils.tools import non_empty_list
 from hestia_earth.models.log import logger
-CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
-_FILEPATH = os.getenv('ECOINVENT_V3_FILEPATH', f"{os.path.join(CURRENT_DIR, 'ecoinventV3_excerpt')}.csv")
+_CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+_ENV_NAME = 'ECOINVENT_V3_FILEPATH'
 @lru_cache()
 def _get_file():
-    if not os.path.exists(_FILEPATH):
-        logger.warning('Ecoinvent file not found. Please make sure to set env variable "ECOINVENT_V3_FILEPATH".')
+    filepath = os.getenv(_ENV_NAME, f"{os.path.join(_CURRENT_DIR, 'ecoinventV3_excerpt')}.csv")
+    if not os.path.exists(filepath):
+        logger.warning('Ecoinvent file not found. Please make sure to set env variable "%s".', _ENV_NAME)
         return None
-    return load_lookup(filepath=_FILEPATH, keep_in_memory=True)
+    return load_lookup(filepath=filepath, keep_in_memory=True)
 def ecoinventV3_emissions(ecoinventName: str):

hestia_earth/models/ecoinventV3/__init__.py CHANGED Viewed

@@ -5,7 +5,11 @@ from hestia_earth.utils.tools import flatten, list_sum
 from hestia_earth.models.log import debugValues, logShouldRun, logRequirements
 from hestia_earth.models.data.ecoinventV3 import ecoinventV3_emissions
 from hestia_earth.models.utils.emission import _new_emission
-from hestia_earth.models.utils.background_emissions import get_background_inputs, no_gap_filled_background_emissions
+from hestia_earth.models.utils.background_emissions import (
+    get_background_inputs,
+    no_gap_filled_background_emissions,
+    log_missing_emissions
+)
 from hestia_earth.models.utils.blank_node import group_by_keys
 from hestia_earth.models.utils.pesticideAI import get_pesticides_from_inputs
 from hestia_earth.models.utils.fertiliser import get_fertilisers_from_inputs
@@ -47,6 +51,7 @@ RETURNS = {
     }]
 }
 LOOKUPS = {
+    "emission": "inputProductionGroupId",
     "electricity": "ecoinventMapping",
     "fuel": "ecoinventMapping",
     "inorganicFertiliser": "ecoinventMapping",
@@ -97,6 +102,7 @@ def _add_emission(cycle: dict, input: dict):
 def _run_input(cycle: dict):
     no_gap_filled_background_emissions_func = no_gap_filled_background_emissions(cycle)
+    log_missing_emissions_func = log_missing_emissions(cycle, model=MODEL, methodTier=TIER)
     def run(inputs: list):
         input = inputs[0]
@@ -118,6 +124,7 @@ def _run_input(cycle: dict):
         logShouldRun(cycle, MODEL, input_term_id, should_run, methodTier=TIER)
         grouped_emissions = reduce(_add_emission(cycle, input), mappings, {}) if should_run else {}
+        log_missing_emissions_func(input_term_id, list(grouped_emissions.keys()))
         return [
             _emission(term_id, value * input_value, input)
             for term_id, value in grouped_emissions.items()

hestia_earth/models/geospatialDatabase/histosol.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from hestia_earth.schema import MeasurementMethodClassification, TermTermType
+from hestia_earth.schema import MeasurementMethodClassification
 from hestia_earth.models.log import logRequirements, logShouldRun
-from hestia_earth.models.utils.measurement import _new_measurement
+from hestia_earth.models.utils.measurement import _new_measurement, total_other_soilType_value
 from hestia_earth.models.utils.source import get_source
 from .utils import download, has_geospatial_data, should_download
 from . import MODEL
@@ -14,7 +14,13 @@ REQUIREMENTS = {
             {"region": {"@type": "Term", "termType": "region"}}
         ],
         "none": {
-            "measurements": [{"@type": "Measurement", "value": "", "term.termType": "soilType"}]
+            "measurements": [{
+                "@type": "Measurement",
+                "value": "100",
+                "depthUpper": "0",
+                "depthLower": "30",
+                "term.termType": "soilType"
+            }]
         }
     }
 }
@@ -50,17 +56,18 @@ def _run(site: dict):
 def _should_run(site: dict):
-    measurements = site.get('measurements', [])
-    no_soil_type = all([m.get('term', {}).get('termType') != TermTermType.SOILTYPE.value for m in measurements])
     contains_geospatial_data = has_geospatial_data(site)
     below_max_area_size = should_download(TERM_ID, site)
+    total_measurements_value = total_other_soilType_value(site.get('measurements', []), TERM_ID)
     logRequirements(site, model=MODEL, term=TERM_ID,
                     contains_geospatial_data=contains_geospatial_data,
                     below_max_area_size=below_max_area_size,
-                    no_soil_type=no_soil_type)
+                    total_soilType_measurements_value=total_measurements_value,
+                    total_soilType_measurements_value_is_0=total_measurements_value == 0)
-    should_run = all([contains_geospatial_data, below_max_area_size, no_soil_type])
+    should_run = all([contains_geospatial_data, below_max_area_size, total_measurements_value == 0])
     logShouldRun(site, MODEL, TERM_ID, should_run)
     return should_run

hestia_earth/models/hestia/aboveGroundCropResidue.py CHANGED Viewed

@@ -105,9 +105,9 @@ def _run(cycle: dict, total_values: list):
         term_id = model.get('product')
         value = _run_model(model, cycle, total_value)
         debugValues(cycle, model=MODEL, term=term_id,
-                    total_value=total_value,
-                    remaining_value=remaining_value,
-                    value=value)
+                    total_above_ground_crop_residue=total_value,
+                    remaining_crop_residue_value=remaining_value,
+                    allocated_value=value)
         if value == 0:
             values.extend([_product(term_id, value)])

hestia_earth/models/hestia/histosol.py ADDED Viewed

@@ -0,0 +1,53 @@
+from hestia_earth.schema import MeasurementMethodClassification
+from hestia_earth.models.log import logRequirements, logShouldRun
+from hestia_earth.models.utils.measurement import _new_measurement, total_other_soilType_value
+from . import MODEL
+REQUIREMENTS = {
+    "Site": {
+        "measurements": [{
+            "@type": "Measurement",
+            "value": "100",
+            "depthUpper": "0",
+            "depthLower": "30",
+            "term.termType": "soilType"
+        }]
+    }
+}
+RETURNS = {
+    "Measurement": [{
+        "value": "0",
+        "depthUpper": "0",
+        "depthLower": "30",
+        "methodClassification": "modelled using other measurements"
+    }]
+}
+LOOKUPS = {
+    "soilType": "sumMax100Group"
+}
+TERM_ID = 'histosol'
+def _measurement():
+    measurement = _new_measurement(TERM_ID)
+    measurement['value'] = [0]
+    measurement['depthUpper'] = 0
+    measurement['depthLower'] = 30
+    measurement['methodClassification'] = MeasurementMethodClassification.MODELLED_USING_OTHER_MEASUREMENTS.value
+    return measurement
+def _should_run(site: dict):
+    total_measurements_value = total_other_soilType_value(site.get('measurements', []), TERM_ID)
+    logRequirements(site, model=MODEL, term=TERM_ID,
+                    total_soilType_measurements_value=total_measurements_value,
+                    total_soilType_measurements_value_is_100=total_measurements_value == 100)
+    should_run = all([total_measurements_value == 100])
+    logShouldRun(site, MODEL, TERM_ID, should_run)
+    return should_run
+def run(site: dict): return [_measurement()] if _should_run(site) else []

hestia_earth/models/hestia/seed_emissions.py CHANGED Viewed

@@ -98,6 +98,23 @@ def _run(cycle: dict, economicValueShare: float, total_yield: float, seed_input:
     ]
+def _map_group_emissions(group_id: str, required_emission_term_ids: list, emission_ids: list):
+    lookup = download_lookup('emission.csv')
+    emissions = list(filter(
+        lambda id: id in required_emission_term_ids,
+        find_term_ids_by(lookup, column_name('inputProductionGroupId'), group_id)
+    ))
+    included_emissions = list(filter(lambda v: v in emission_ids, emissions))
+    missing_emissions = list(filter(lambda v: v not in emission_ids, emissions))
+    return {
+        'id': group_id,
+        'total-emissions': len(emissions),
+        'included-emissions': len(included_emissions),
+        'missing-emissions': '-'.join(missing_emissions),
+        'is-valid': len(emissions) == len(included_emissions)
+    }
 def _filter_emissions(cycle: dict):
     required_emission_term_ids = cycle_emissions_in_system_boundary(cycle)
@@ -117,31 +134,12 @@ def _filter_emissions(cycle: dict):
     group_ids = set([v.get('group-id') for v in emissions if v.get('group-id')])
     # for each group, get the list of all required emissions
-    lookup = download_lookup('emission.csv')
     emissions_per_group = [
-        {
-            'id': group_id,
-            'emissions': list(filter(
-                lambda id: id in required_emission_term_ids,
-                find_term_ids_by(lookup, column_name('inputProductionGroupId'), group_id)
-            ))
-        }
+        _map_group_emissions(group_id, required_emission_term_ids, emission_ids)
         for group_id in group_ids
     ]
-    emissions_per_group = [
-        {
-            'id': group.get('id'),
-            'total-emissions': len(group.get('emissions', [])),
-            'included-emissions': len(list(filter(lambda v: v in emission_ids, group.get('emissions', [])))),
-            'missing-emissions': '-'.join(list(filter(lambda v: v not in emission_ids, group.get('emissions', []))))
-        }
-        for group in emissions_per_group
-    ]
     # only keep groups that have all emissions present in the Cycle
-    valid_groups = list(filter(
-        lambda group: group.get('total-emissions') == group.get('included-emissions'),
-        emissions_per_group
-    ))
+    valid_groups = list(filter(lambda group: group.get('is-valid'), emissions_per_group))
     valid_group_ids = set([v.get('id') for v in valid_groups])
     # finally, only return emissions which groups are valid
@@ -273,6 +271,12 @@ def _should_run(cycle: dict):
         logShouldRun(cycle, MODEL, term_id, should_run, methodTier=TIER, model_key=MODEL_KEY)
+        # log missing emissions to show in the logs
+        for group in emissions_per_group:
+            if not group.get('is-valid'):
+                logShouldRun(cycle, MODEL, term_id, False,
+                             methodTier=TIER, model_key=MODEL_KEY, emission_id=group.get('id'))
     return should_run, total_economicValueShare, total_yield, grouped_seed_inputs, grouped_emissions

hestia_earth/models/ipcc2019/organicCarbonPerHa_tier_1.py CHANGED Viewed

@@ -3,17 +3,18 @@ from functools import reduce
 from numpy import empty_like, random, vstack
 from numpy.typing import NDArray
 from pydash.objects import merge
-from typing import Callable, Optional, Union
+from typing import Callable, Literal, Optional, Union
 from hestia_earth.schema import MeasurementMethodClassification, SiteSiteType, TermTermType
-from hestia_earth.utils.model import find_term_match, filter_list_term_type
 from hestia_earth.utils.blank_node import get_node_value
+from hestia_earth.utils.model import find_term_match, filter_list_term_type
+from hestia_earth.utils.tools import non_empty_list
 from hestia_earth.models.utils import split_on_condition
 from hestia_earth.models.utils.array_builders import gen_seed
 from hestia_earth.models.utils.blank_node import (
-    cumulative_nodes_match, cumulative_nodes_lookup_match, cumulative_nodes_term_match, node_lookup_match,
-    node_term_match, group_nodes_by_year, validate_start_date_end_date
+    cumulative_nodes_match, cumulative_nodes_lookup_match, cumulative_nodes_term_match, group_by_term,
+    node_lookup_match, node_term_match, group_nodes_by_year, validate_start_date_end_date
 )
 from hestia_earth.models.utils.ecoClimateZone import EcoClimateZone, get_eco_climate_zone_value
 from hestia_earth.models.utils.descriptive_stats import calc_descriptive_stats
@@ -1031,8 +1032,8 @@ def _assign_ipcc_soil_category(
     IpccSoilCategory
         The assigned IPCC soil category.
     """
-    soil_types = filter_list_term_type(measurement_nodes, TermTermType.SOILTYPE)
-    usda_soil_types = filter_list_term_type(measurement_nodes, TermTermType.USDASOILTYPE)
+    soil_types = _get_soil_type_measurements(measurement_nodes, TermTermType.SOILTYPE)
+    usda_soil_types = _get_soil_type_measurements(measurement_nodes, TermTermType.USDASOILTYPE)
     clay_content = get_node_value(find_term_match(measurement_nodes, _CLAY_CONTENT_TERM_ID))
     sand_content = get_node_value(find_term_match(measurement_nodes, _SAND_CONTENT_TERM_ID))
@@ -1053,6 +1054,20 @@ def _assign_ipcc_soil_category(
     ) if len(soil_types) > 0 or len(usda_soil_types) > 0 else default
+def _get_soil_type_measurements(
+    nodes: list[dict], term_type: Literal[TermTermType.SOILTYPE, TermTermType.USDASOILTYPE]
+) -> list[dict]:
+    grouped = group_by_term(filter_list_term_type(nodes, term_type))
+    def depth_distance(node):
+        upper, lower = node.get("depthUpper", 0), node.get("depthLower", 100)
+        return abs(upper - DEPTH_UPPER) + abs(lower - DEPTH_LOWER)
+    return non_empty_list(
+        min(nodes_, key=depth_distance) for key in grouped if (nodes_ := grouped.get(key, []))
+    )
 def _check_soil_category(
     *,
     key: IpccSoilCategory,
@@ -1461,7 +1476,7 @@ Value: Corresponding decision tree for IPCC management categories based on land
 """
 _IPCC_LAND_USE_CATEGORY_TO_DEFAULT_IPCC_MANAGEMENT_CATEGORY = {
-    IpccLandUseCategory.GRASSLAND: IpccManagementCategory.NOMINALLY_MANAGED,
+    IpccLandUseCategory.GRASSLAND: IpccManagementCategory.UNKNOWN,
     IpccLandUseCategory.ANNUAL_CROPS_WET: IpccManagementCategory.UNKNOWN,
     IpccLandUseCategory.ANNUAL_CROPS: IpccManagementCategory.UNKNOWN
 }

hestia-earth-models 0.70.6__py3-none-any.whl → 0.72.0__py3-none-any.whl

hestia-earth-models 0.70.6py3-none-any.whl → 0.72.0py3-none-any.whl