PyPI - hestia-earth-models - Versions diffs - 0.70.6__py3-none-any.whl → 0.71.0__py3-none-any.whl - Mend

hestia-earth-models 0.70.6py3-none-any.whl → 0.71.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

hestia_earth/models/cache_nodes.py ADDED Viewed

@@ -0,0 +1,157 @@
+import os
+from functools import reduce
+from hestia_earth.schema import NodeType
+from hestia_earth.utils.tools import current_time_ms, flatten
+from hestia_earth.earth_engine import init_gee
+from .log import logger
+from .utils import CACHE_KEY
+from .utils.site import years_from_cycles
+from .utils.source import CACHE_SOURCES_KEY, find_sources
+from .cache_sites import run as cache_sites
+CACHE_RELATED_KEY = 'related'
+CACHE_NESTED_KEY = 'nested'
+_CACHE_BATCH_SIZE = int(os.getenv('CACHE_SITES_BATCH_SIZE', '5000'))
+_ENABLE_CACHE_YEARS = os.getenv('ENABLE_CACHE_YEARS', 'true') == 'true'
+_ENABLE_CACHE_RELATED_NODES = os.getenv('ENABLE_CACHE_RELATED_NODES', 'true') == 'true'
+_CACHE_NODE_TYPES = [
+    NodeType.SITE.value,
+    NodeType.CYCLE.value,
+    NodeType.IMPACTASSESSMENT.value
+]
+def _pop_items(values: list, nb_items: int):
+    if len(values) < nb_items:
+        removed_items = values[:]  # Get a copy of the entire array
+        values.clear()  # Remove all items from the original array
+    else:
+        removed_items = values[:nb_items]  # Get the first N items
+        del values[:nb_items]  # Remove the first N items from the original array
+    return removed_items
+def _filter_by_type(nodes: list, type: str): return [n for n in nodes if n.get('@type', n.get('type')) == type]
+def _node_key(node: dict): return '/'.join([node.get('type', node.get('@type')), node.get('id', node.get('@id'))])
+def _years_from_cycles(nodes: dict): return years_from_cycles(_filter_by_type(nodes, NodeType.CYCLE.value))
+def _linked_node(data: dict): return {'type': data.get('type'), 'id': data.get('id')}
+def _find_nested_nodes(data) -> list[dict]:
+    if isinstance(data, dict):
+        if data.get('type') in _CACHE_NODE_TYPES and data.get('id'):
+            return [_linked_node(data)]
+        return flatten(_find_nested_nodes(list(data.values())))
+    if isinstance(data, list):
+        return flatten(map(_find_nested_nodes, data))
+    return []
+def _nested_nodes(node_keys: list[str]):
+    def exec(group: dict, node: dict):
+        nested_nodes = _find_nested_nodes(list(node.values()))
+        for nested_node in nested_nodes:
+            group_id = _node_key(nested_node)
+            group[group_id] = group.get(group_id, {})
+            group[group_id][CACHE_RELATED_KEY] = group.get(group_id, {}).get(CACHE_RELATED_KEY, []) + [
+                _linked_node(node)
+            ]
+            # cache nodes that current node refers (nesting)
+            if group_id in node_keys:
+                group_id = _node_key(node)
+                group[group_id] = group.get(group_id, {})
+                group[group_id][CACHE_NESTED_KEY] = group.get(group_id, {}).get(CACHE_NESTED_KEY, []) + [
+                    _linked_node(nested_node)
+                ]
+        return group
+    return exec
+def _cache_related_nodes(nodes: list):
+    # only cache nodes included in the file
+    nodes_keys = list(map(_node_key, nodes))
+    # for each node, compile list of nested nodes
+    nested_nodes_mapping = reduce(_nested_nodes(nodes_keys), nodes, {})
+    def cache_related_node(node: dict):
+        nodes_mapping = nested_nodes_mapping.get(_node_key(node), {})
+        related_nodes = nodes_mapping.get(CACHE_RELATED_KEY) or []
+        nested_nodes = nodes_mapping.get(CACHE_NESTED_KEY) or []
+        # save in cache
+        cached_data = node.get(CACHE_KEY, {}) | {
+            CACHE_RELATED_KEY: related_nodes,
+            CACHE_NESTED_KEY: nested_nodes
+        }
+        return node | {CACHE_KEY: cached_data}
+    return list(map(cache_related_node, nodes))
+def _cache_sources(nodes: list):
+    sources = find_sources()
+    return [
+        n | ({
+            CACHE_KEY: n.get(CACHE_KEY, {}) | {CACHE_SOURCES_KEY: sources}
+        } if n.get('type', n.get('@type')) in _CACHE_NODE_TYPES else {})
+        for n in nodes
+    ]
+def _safe_cache_sites(sites: list, years: list):
+    try:
+        return cache_sites(sites, years)
+    except Exception as e:
+        logger.error(f"An error occured while caching nodes on EE: {str(e)}")
+        if 'exceeded' in str(e):
+            logger.debug('Fallback to caching sites one by one')
+            # run one by one in case the batching does not work
+            return flatten([cache_sites([site], years) for site in sites])
+        else:
+            raise e
+def _cache_sites(nodes: list, batch_size: int = _CACHE_BATCH_SIZE):
+    start = current_time_ms()
+    # build list of nodes by key to update as sites are processed
+    nodes_mapping = {_node_key(n): n for n in nodes}
+    years = _years_from_cycles(nodes) if _ENABLE_CACHE_YEARS else []
+    sites = _filter_by_type(nodes, 'Site')
+    while len(sites) > 0:
+        batch_values = _pop_items(sites, batch_size)
+        logger.info(f"Processing {len(batch_values)} sites / {len(sites)} remaining.")
+        results = _safe_cache_sites(batch_values, years)
+        for result in results:
+            nodes_mapping[_node_key(result)] = result
+    logger.info(f"Done caching sites in {current_time_ms() - start} ms")
+    # replace original sites with new cached sites
+    return list(nodes_mapping.values())
+def run(nodes: list):
+    init_gee()
+    # cache sites data
+    cached_nodes = _cache_sites(nodes)
+    # cache related nodes
+    cached_nodes = _cache_related_nodes(cached_nodes) if _ENABLE_CACHE_RELATED_NODES else cached_nodes
+    # cache sources
+    return _cache_sources(cached_nodes)

hestia_earth/models/cache_sites.py CHANGED Viewed

@@ -81,7 +81,7 @@ def _run_values(
         site_cache = merge(
             site.get(CACHE_KEY, {}),
             {CACHE_GEOSPATIAL_KEY: cached_data},
-            ({CACHE_YEARS_KEY: list(set(cached_value(site, CACHE_YEARS_KEY, []) + years))} if years else {})
+            ({CACHE_YEARS_KEY: sorted(list(set(cached_value(site, CACHE_YEARS_KEY, []) + years)))} if years else {})
         )
         return merge(site, {CACHE_KEY: site_cache})

hestia_earth/models/config/Cycle.json CHANGED Viewed

@@ -2054,36 +2054,6 @@
         },
         "stage": 2
       },
-      {
-        "key": "emissions",
-        "model": "ipcc2006",
-        "value": "n2OToAirOrganicSoilCultivationDirect",
-        "runStrategy": "add_blank_node_if_missing",
-        "runArgs": {
-          "runNonMeasured": true,
-          "runNonAddedTerm": true
-        },
-        "mergeStrategy": "list",
-        "mergeArgs": {
-          "replaceThreshold": ["value", 0.01]
-        },
-        "stage": 2
-      },
-      {
-        "key": "emissions",
-        "model": "ipcc2006",
-        "value": "co2ToAirOrganicSoilCultivation",
-        "runStrategy": "add_blank_node_if_missing",
-        "runArgs": {
-          "runNonMeasured": true,
-          "runNonAddedTerm": true
-        },
-        "mergeStrategy": "list",
-        "mergeArgs": {
-          "replaceThreshold": ["value", 0.01]
-        },
-        "stage": 2
-      },
       {
         "key": "emissions",
         "model": "ipcc2006",

hestia_earth/models/data/ecoinventV3/__init__.py CHANGED Viewed

@@ -5,17 +5,19 @@ from hestia_earth.utils.tools import non_empty_list
 from hestia_earth.models.log import logger
-CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
-_FILEPATH = os.getenv('ECOINVENT_V3_FILEPATH', f"{os.path.join(CURRENT_DIR, 'ecoinventV3_excerpt')}.csv")
+_CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+_ENV_NAME = 'ECOINVENT_V3_FILEPATH'
 @lru_cache()
 def _get_file():
-    if not os.path.exists(_FILEPATH):
-        logger.warning('Ecoinvent file not found. Please make sure to set env variable "ECOINVENT_V3_FILEPATH".')
+    filepath = os.getenv(_ENV_NAME, f"{os.path.join(_CURRENT_DIR, 'ecoinventV3_excerpt')}.csv")
+    if not os.path.exists(filepath):
+        logger.warning('Ecoinvent file not found. Please make sure to set env variable "%s".', _ENV_NAME)
         return None
-    return load_lookup(filepath=_FILEPATH, keep_in_memory=True)
+    return load_lookup(filepath=filepath, keep_in_memory=True)
 def ecoinventV3_emissions(ecoinventName: str):

hestia_earth/models/ecoinventV3/__init__.py CHANGED Viewed

@@ -5,7 +5,11 @@ from hestia_earth.utils.tools import flatten, list_sum
 from hestia_earth.models.log import debugValues, logShouldRun, logRequirements
 from hestia_earth.models.data.ecoinventV3 import ecoinventV3_emissions
 from hestia_earth.models.utils.emission import _new_emission
-from hestia_earth.models.utils.background_emissions import get_background_inputs, no_gap_filled_background_emissions
+from hestia_earth.models.utils.background_emissions import (
+    get_background_inputs,
+    no_gap_filled_background_emissions,
+    log_missing_emissions
+)
 from hestia_earth.models.utils.blank_node import group_by_keys
 from hestia_earth.models.utils.pesticideAI import get_pesticides_from_inputs
 from hestia_earth.models.utils.fertiliser import get_fertilisers_from_inputs
@@ -47,6 +51,7 @@ RETURNS = {
     }]
 }
 LOOKUPS = {
+    "emission": "inputProductionGroupId",
     "electricity": "ecoinventMapping",
     "fuel": "ecoinventMapping",
     "inorganicFertiliser": "ecoinventMapping",
@@ -97,6 +102,7 @@ def _add_emission(cycle: dict, input: dict):
 def _run_input(cycle: dict):
     no_gap_filled_background_emissions_func = no_gap_filled_background_emissions(cycle)
+    log_missing_emissions_func = log_missing_emissions(cycle, model=MODEL, methodTier=TIER)
     def run(inputs: list):
         input = inputs[0]
@@ -118,6 +124,7 @@ def _run_input(cycle: dict):
         logShouldRun(cycle, MODEL, input_term_id, should_run, methodTier=TIER)
         grouped_emissions = reduce(_add_emission(cycle, input), mappings, {}) if should_run else {}
+        log_missing_emissions_func(input_term_id, list(grouped_emissions.keys()))
         return [
             _emission(term_id, value * input_value, input)
             for term_id, value in grouped_emissions.items()

hestia_earth/models/hestia/aboveGroundCropResidue.py CHANGED Viewed

@@ -105,9 +105,9 @@ def _run(cycle: dict, total_values: list):
         term_id = model.get('product')
         value = _run_model(model, cycle, total_value)
         debugValues(cycle, model=MODEL, term=term_id,
-                    total_value=total_value,
-                    remaining_value=remaining_value,
-                    value=value)
+                    total_above_ground_crop_residue=total_value,
+                    remaining_crop_residue_value=remaining_value,
+                    allocated_value=value)
         if value == 0:
             values.extend([_product(term_id, value)])

hestia_earth/models/hestia/seed_emissions.py CHANGED Viewed

@@ -98,6 +98,23 @@ def _run(cycle: dict, economicValueShare: float, total_yield: float, seed_input:
     ]
+def _map_group_emissions(group_id: str, required_emission_term_ids: list, emission_ids: list):
+    lookup = download_lookup('emission.csv')
+    emissions = list(filter(
+        lambda id: id in required_emission_term_ids,
+        find_term_ids_by(lookup, column_name('inputProductionGroupId'), group_id)
+    ))
+    included_emissions = list(filter(lambda v: v in emission_ids, emissions))
+    missing_emissions = list(filter(lambda v: v not in emission_ids, emissions))
+    return {
+        'id': group_id,
+        'total-emissions': len(emissions),
+        'included-emissions': len(included_emissions),
+        'missing-emissions': '-'.join(missing_emissions),
+        'is-valid': len(emissions) == len(included_emissions)
+    }
 def _filter_emissions(cycle: dict):
     required_emission_term_ids = cycle_emissions_in_system_boundary(cycle)
@@ -117,31 +134,12 @@ def _filter_emissions(cycle: dict):
     group_ids = set([v.get('group-id') for v in emissions if v.get('group-id')])
     # for each group, get the list of all required emissions
-    lookup = download_lookup('emission.csv')
     emissions_per_group = [
-        {
-            'id': group_id,
-            'emissions': list(filter(
-                lambda id: id in required_emission_term_ids,
-                find_term_ids_by(lookup, column_name('inputProductionGroupId'), group_id)
-            ))
-        }
+        _map_group_emissions(group_id, required_emission_term_ids, emission_ids)
         for group_id in group_ids
     ]
-    emissions_per_group = [
-        {
-            'id': group.get('id'),
-            'total-emissions': len(group.get('emissions', [])),
-            'included-emissions': len(list(filter(lambda v: v in emission_ids, group.get('emissions', [])))),
-            'missing-emissions': '-'.join(list(filter(lambda v: v not in emission_ids, group.get('emissions', []))))
-        }
-        for group in emissions_per_group
-    ]
     # only keep groups that have all emissions present in the Cycle
-    valid_groups = list(filter(
-        lambda group: group.get('total-emissions') == group.get('included-emissions'),
-        emissions_per_group
-    ))
+    valid_groups = list(filter(lambda group: group.get('is-valid'), emissions_per_group))
     valid_group_ids = set([v.get('id') for v in valid_groups])
     # finally, only return emissions which groups are valid
@@ -273,6 +271,12 @@ def _should_run(cycle: dict):
         logShouldRun(cycle, MODEL, term_id, should_run, methodTier=TIER, model_key=MODEL_KEY)
+        # log missing emissions to show in the logs
+        for group in emissions_per_group:
+            if not group.get('is-valid'):
+                logShouldRun(cycle, MODEL, term_id, False,
+                             methodTier=TIER, model_key=MODEL_KEY, emission_id=group.get('id'))
     return should_run, total_economicValueShare, total_yield, grouped_seed_inputs, grouped_emissions

hestia-earth-models 0.70.6__py3-none-any.whl → 0.71.0__py3-none-any.whl

hestia-earth-models 0.70.6py3-none-any.whl → 0.71.0py3-none-any.whl