PyPI - hestia-earth-models - Versions diffs - 0.65.6__py3-none-any.whl → 0.65.7__py3-none-any.whl - Mend

hestia-earth-models 0.65.6py3-none-any.whl → 0.65.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

hestia_earth/models/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION = '0.65.6'
1	+ VERSION = '0.65.7'

hestia_earth/orchestrator/__init__.py ADDED Viewed

@@ -0,0 +1,40 @@
+from pkgutil import extend_path
+from typing import Union, List
+from hestia_earth.utils.tools import current_time_ms
+from .log import logger
+from .models import run as run_models
+__path__ = extend_path(__path__, __name__)
+def _required(message): raise Exception(message)
+def run(data: dict, configuration: dict, stage: Union[int, List[int]] = None) -> dict:
+    """
+    Runs a set of models on a Node.
+    Parameters
+    ----------
+    data : dict
+        Either a `Cycle`, a `Site` or an `ImpactAssessment`.
+    configuration : dict
+        Configuration data which defines the order of the models to run.
+    stage : int | list[int]
+        For multi-stage calculations, will filter models by "stage". Can pass a single or multiple stage.
+    Returns
+    -------
+    dict
+        The data with updated content
+    """
+    now = current_time_ms()
+    node_type = data.get('@type', data.get('type'))
+    node_id = data.get('@id', data.get('id'))
+    _required('Please provide an "@type" key in your data.') if node_type is None else None
+    _required('Please provide a valid configuration.') if (configuration or {}).get('models') is None else None
+    logger.info(f"Running models on {node_type}" + f" with id: {node_id}" if node_id else '')
+    data = run_models(data, configuration.get('models', []), stage=stage)
+    logger.info('time=%s, unit=ms', current_time_ms() - now)
+    return data

hestia_earth/orchestrator/log.py ADDED Viewed

@@ -0,0 +1,62 @@
+import os
+import sys
+import logging
+LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO')
+# disable root logger
+root_logger = logging.getLogger()
+root_logger.disabled = True
+# create custom logger
+logger = logging.getLogger('hestia_earth.orchestrator')
+logger.removeHandler(sys.stdout)
+logger.setLevel(logging.getLevelName(LOG_LEVEL))
+def log_to_file(filepath: str):
+    """
+    By default, all logs are saved into a file with path stored in the env variable `LOG_FILENAME`.
+    If you do not set the environment variable `LOG_FILENAME`, you can use this function with the file path.
+    Parameters
+    ----------
+    filepath : str
+        Path of the file.
+    """
+    formatter = logging.Formatter(
+        '{"timestamp": "%(asctime)s", "level": "%(levelname)s", "logger": "%(name)s", '
+        '"filename": "%(filename)s", "message": "%(message)s"}',
+        '%Y-%m-%dT%H:%M:%S%z')
+    handler = logging.FileHandler(filepath, encoding='utf-8')
+    handler.setFormatter(formatter)
+    handler.setLevel(logging.getLevelName(LOG_LEVEL))
+    logger.addHandler(handler)
+LOG_FILENAME = os.getenv('LOG_FILENAME')
+if LOG_FILENAME is not None:
+    log_to_file(LOG_FILENAME)
+def _join_args(**kwargs): return ', '.join([f"{key}={value}" for key, value in kwargs.items()])
+def _log_node_suffix(node: dict = {}):
+    node_type = node.get('@type', node.get('type')) if node else None
+    node_id = node.get('@id', node.get('id', node.get('term', {}).get('@id'))) if node else None
+    return f"{node_type.lower()}={node_id}, " if node_type else ''
+def debugValues(log_node: dict, **kwargs):
+    logger.debug(_log_node_suffix(log_node) + _join_args(**kwargs))
+def logShouldRun(log_node: dict, model: str, term: str, should_run: bool, **kwargs):
+    extra = (', ' + _join_args(**kwargs)) if len(kwargs.keys()) > 0 else ''
+    logger.info(_log_node_suffix(log_node) + 'should_run=%s, model=%s, term=%s' + extra, should_run, model, term)
+def logShouldMerge(log_node: dict, model: str, term: str, should_merge: bool, **kwargs):
+    extra = (', ' + _join_args(**kwargs)) if len(kwargs.keys()) > 0 else ''
+    logger.info(_log_node_suffix(log_node) + 'should_merge=%s, model=%s, term=%s' + extra, should_merge, model, term)

hestia_earth/orchestrator/models/__init__.py ADDED Viewed

@@ -0,0 +1,118 @@
+import os
+from typing import Union, List
+import importlib
+from functools import reduce
+import concurrent.futures
+from copy import deepcopy
+from hestia_earth.utils.tools import non_empty_list
+from hestia_earth.models.version import VERSION
+from ..log import logger
+from ..utils import get_required_model_param, _snakecase
+from ..strategies.run import should_run
+from ..strategies.merge import merge
+def _max_workers(type: str):
+    try:
+        return int(os.getenv(f"MAX_WORKERS_{type.upper()}"))
+    except Exception:
+        return None
+def _list_except_item(list, item):
+    idx = list.index(item)
+    return list[:idx] + list[idx+1:]
+def _filter_models_stage(models: list, stage: Union[int, List[int]] = None):
+    stages = stage if isinstance(stage, list) else [stage] if stage is not None else None
+    return models if stage is None else non_empty_list([
+        (_filter_models_stage(m, stage) if isinstance(m, list) else m) for m in models if (
+            not isinstance(m, dict) or m.get('stage') in stages
+        )
+    ])
+def _import_model(name: str):
+    # try to load the model from the default hestia engine, fallback to orchestrator model
+    try:
+        return {
+            'run': importlib.import_module(f"hestia_earth.models.{name}").run,
+            'version': importlib.import_module('hestia_earth.models.version').VERSION
+        }
+    except ModuleNotFoundError:
+        # try to load the model from the the models folder, fallback to fully specified name
+        try:
+            return {
+                'run': importlib.import_module(f"hestia_earth.orchestrator.models.{name}").run,
+                'version': importlib.import_module('hestia_earth.orchestrator.version').VERSION
+            }
+        except ModuleNotFoundError:
+            return {
+                'run': importlib.import_module(f"{name}").run,
+                'version': VERSION
+            }
+def _run_pre_checks(data: dict):
+    node_type = _snakecase(data.get('@type', data.get('type')))
+    try:
+        pre_checks = _import_model('.'.join([node_type, 'pre_checks'])).get('run')
+        logger.info('running pre checks for %s', node_type)
+        return pre_checks(data)
+    except Exception:
+        return data
+def _run_post_checks(data: dict):
+    node_type = _snakecase(data.get('@type', data.get('type')))
+    try:
+        post_checks = _import_model('.'.join([node_type, 'post_checks'])).get('run')
+        logger.info('running post checks for %s', node_type)
+        return post_checks(data)
+    except Exception:
+        return data
+def _run_model(data: dict, model: dict, all_models: list):
+    module = _import_model(get_required_model_param(model, 'model'))
+    # if no value is provided, use all the models but this one
+    model_value = model.get('value') or _list_except_item(all_models, model)
+    result = module.get('run')(model_value, data)
+    return {'data': data, 'model': model, 'version': module.get('version'), 'result': result}
+def _run(data: dict, model: dict, all_models: list):
+    return _run_model(data, model, all_models) if should_run(data, model) else None
+def _run_serie(data: dict, models: list, stage: Union[int, List[int]] = None):
+    return reduce(
+        lambda prev, m: merge(
+            prev, _run_parallel(prev, m, models) if isinstance(m, list) else [_run(deepcopy(prev), m, models)]
+        ),
+        _filter_models_stage(models, stage=stage),
+        data
+    )
+def _run_parallel(data: dict, model: list, all_models: list):
+    results = []
+    max_workers = _max_workers(data.get('@type', data.get('type')))
+    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = [executor.submit(_run, deepcopy(data), m, all_models) for m in model]
+    for future in concurrent.futures.as_completed(futures):
+        results.append(future.result())
+    return results
+def run(data: dict, models: list, stage: Union[int, List[int]] = None):
+    # run pre-checks if exist
+    data = _run_pre_checks(data)
+    data = _run_serie(data, models, stage=stage)
+    # run post-checks if exist
+    return _run_post_checks(data)

hestia_earth/orchestrator/models/emissions/__init__.py ADDED Viewed

File without changes

hestia_earth/orchestrator/models/emissions/deleted.py ADDED Viewed

@@ -0,0 +1,15 @@
+from hestia_earth.utils.tools import non_empty_list
+from hestia_earth.utils.emission import emissions_in_system_boundary
+def _run_emission(term_ids: list):
+    def run(emission: dict):
+        term_id = emission.get('term', {}).get('@id')
+        return (emission | {'deleted': True}) if term_id not in term_ids else None
+    return run
+def run(_models: list, cycle: dict):
+    emission_ids = emissions_in_system_boundary()
+    emissions = cycle.get('emissions', [])
+    return non_empty_list(map(_run_emission(emission_ids), emissions)) if len(emission_ids) > 0 else []

hestia_earth/orchestrator/models/transformations.py ADDED Viewed

@@ -0,0 +1,103 @@
+from copy import deepcopy
+from functools import reduce
+from hestia_earth.schema import CompletenessJSONLD
+from hestia_earth.utils.lookup import download_lookup, get_table_value, column_name
+from hestia_earth.models.transformation.input.utils import replace_input_field
+from hestia_earth.models.utils.transformation import previous_transformation
+from hestia_earth.models.utils.product import find_by_product
+from . import run as run_node, _import_model
+from hestia_earth.orchestrator.utils import new_practice, _filter_by_keys
+def _full_completeness():
+    completeness = CompletenessJSONLD().to_dict()
+    keys = list(completeness.keys())
+    keys.remove('@type')
+    return {'@type': completeness['@type']} | reduce(lambda prev, curr: prev | {curr: True}, keys, {})
+def _include_practice(practice: dict):
+    term = practice.get('term', {})
+    term_type = term.get('termType')
+    term_id = term.get('@id')
+    lookup = download_lookup(f"{term_type}.csv")
+    value = get_table_value(lookup, 'termid', term_id, column_name('includeForTransformation'))
+    return False if value is None or value == '' or not value else True
+def _copy_from_cycle(cycle: dict, transformation: dict, keys: list):
+    data = deepcopy(transformation)
+    for key in keys:
+        value = transformation.get(key.replace('cycle', 'transformation')) or \
+            transformation.get(key) or \
+            cycle.get(key)
+        if value is not None:
+            data[key] = value
+    return data
+def _convert_transformation(cycle: dict, transformation: dict):
+    data = _copy_from_cycle(cycle, transformation, [
+        'functionalUnit', 'site', 'otherSites', 'cycleDuration', 'startDate', 'endDate'
+    ])
+    data['completeness'] = cycle.get('completeness', _full_completeness())
+    data['practices'] = [
+        new_practice(transformation.get('term'))  # add `term` as a Practice
+    ] + transformation.get('practices', []) + [
+        p for p in cycle.get('practices', []) if _include_practice(p)  # some practices need to be copied over
+    ]
+    return data
+def _run_models(cycle: dict, transformation: dict, models: list):
+    data = _convert_transformation(cycle, transformation)
+    result = run_node(data, models)
+    return _filter_by_keys(result, [
+        'transformationId', 'term', 'inputs', 'products', 'emissions'
+    ])
+def _apply_transformation_share(previous: dict, current: dict):
+    share = current.get('transformedShare', 100)
+    def replace_value(input: dict):
+        product = find_by_product(previous, input)
+        return {
+            **input,
+            **replace_input_field(previous, None, input, product, share, 'value'),
+            **replace_input_field(previous, None, input, product, share, 'min'),
+            **replace_input_field(previous, None, input, product, share, 'max'),
+            **replace_input_field(previous, None, input, product, share, 'sd')
+        }
+    return current | {'inputs': list(map(replace_value, current.get('inputs', [])))}
+def _add_excreta_inputs(previous: dict, current: dict):
+    run = _import_model('transformation.input.excreta').get('run')
+    cycle = {
+        **previous,
+        '@type': 'Cycle',
+        'transformations': [current]
+    }
+    # model will add the inputs directly in the transformation
+    run(cycle)
+    return current
+def _run_transformation(cycle: dict, models: list):
+    def run(transformations: list, transformation: dict):
+        previous = previous_transformation(cycle, transformations, transformation)
+        transformation = _apply_transformation_share(previous, transformation)
+        # add missing excreta Input when relevant and apply the value share as well
+        transformation = _add_excreta_inputs(previous, transformation)
+        transformation = _apply_transformation_share(previous, transformation)
+        transformation = _run_models(cycle, transformation, models)
+        return transformations + [transformation]
+    return run
+def run(models: list, cycle: dict):
+    transformations = cycle.get('transformations', [])
+    return reduce(_run_transformation(cycle, models), transformations, [])

hestia_earth/orchestrator/strategies/__init__.py ADDED Viewed

File without changes

hestia_earth/orchestrator/strategies/merge/__init__.py ADDED Viewed

@@ -0,0 +1,42 @@
+from functools import reduce
+import pydash
+from hestia_earth.orchestrator.utils import _non_empty, _non_empty_list, update_node_version
+from . import merge_append
+from . import merge_default
+from . import merge_list
+from . import merge_node
+def _non_empty_results(results: list):
+    return list(filter(lambda value: _non_empty(value) and _non_empty_list(value.get('result')), results))
+def _merge_version(data: dict): return data.get('version')  # set as a function to patch it for testing
+_STRATEGIES = {
+    'list': merge_list.merge,
+    'append': merge_append.merge,
+    'node': merge_node.merge,
+    'default': merge_default.merge
+}
+def _merge_result(data: dict, result: dict):
+    model = result.get('model')
+    key = model.get('key')
+    values = result.get('result')
+    version = _merge_version(result)
+    merge_type = model.get('mergeStrategy', 'default')
+    merge_args = model.get('mergeArgs', {})
+    current = data.get(key)
+    node_type = data.get('type', data.get('@type'))
+    values = [values] if not isinstance(values, list) and merge_type == 'list' else values
+    new_value = _STRATEGIES[merge_type](current, values, version, model, merge_args, node_type)
+    new_data = pydash.objects.merge({}, data, {key: new_value})
+    return update_node_version(version, new_data, data)
+def merge(data: dict, results: list):
+    return reduce(_merge_result, _non_empty_results(results), data)

hestia_earth/orchestrator/strategies/merge/merge_append.py ADDED Viewed

@@ -0,0 +1,29 @@
+from functools import reduce
+from hestia_earth.orchestrator.utils import _non_empty_list, update_node_version
+from hestia_earth.orchestrator.log import logger
+def _merge_node(dest: dict, version: str):
+    term_id = dest.get('term', {}).get('@id', dest.get('@id'))
+    logger.debug('append %s with value: %s', term_id, dest.get('value'))
+    return [update_node_version(version, dest)]
+_MERGE_BY_TYPE = {
+    'dict': _merge_node,
+    'list': lambda dest, *args: dest
+}
+def _merge_el(version: str):
+    def merge(source: list, dest: dict):
+        ntype = type(dest).__name__
+        return source + _MERGE_BY_TYPE.get(ntype, lambda *args: [dest])(dest, version)
+    return merge
+def merge(source: list, dest, version: str, *args):
+    source = source if source is not None else []
+    nodes = _non_empty_list(dest if isinstance(dest, list) else [dest])
+    return reduce(_merge_el(version), nodes, source)

hestia_earth/orchestrator/strategies/merge/merge_default.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ def merge(_source, dest, *args): return dest

hestia_earth/orchestrator/strategies/merge/merge_list.py ADDED Viewed

@@ -0,0 +1,103 @@
+import pydash
+from hestia_earth.schema import UNIQUENESS_FIELDS
+from hestia_earth.orchestrator.utils import _non_empty_list, update_node_version
+from .merge_node import merge as merge_node
+_METHOD_MODEL_KEY = 'methodModel.@id'
+def _matching_properties(model: dict, node_type: str):
+    return UNIQUENESS_FIELDS.get(node_type, {}).get(model.get('key'), [])
+def _has_property(value: dict, key: str):
+    keys = key.split('.')
+    is_list = len(keys) >= 2 and isinstance(pydash.objects.get(value, keys[0]), list)
+    values = [
+        pydash.objects.get(v, '.'.join(keys[1:])) for v in pydash.objects.get(value, keys[0])
+    ] if is_list else [
+        pydash.objects.get(value, key)
+    ]
+    return all([v is not None for v in values])
+def _values_have_property(values: list, key: str): return any([_has_property(v, key) for v in values])
+def _match_list_el(source: list, dest: list, key: str):
+    src_value = sorted(_non_empty_list([pydash.objects.get(x, key) for x in source]))
+    dest_value = sorted(_non_empty_list([pydash.objects.get(x, key) for x in dest]))
+    return src_value == dest_value
+def _match_el(source: dict, dest: dict, keys: list):
+    def match(key: str):
+        keys = key.split('.')
+        src_value = pydash.objects.get(source, key)
+        dest_value = pydash.objects.get(dest, key)
+        is_list = len(keys) >= 2 and (
+            isinstance(pydash.objects.get(source, keys[0]), list) or
+            isinstance(pydash.objects.get(dest, keys[0]), list)
+        )
+        return _match_list_el(
+            pydash.objects.get(source, keys[0], []),
+            pydash.objects.get(dest, keys[0], []),
+            '.'.join(keys[1:])
+        ) if is_list else src_value == dest_value
+    source_properties = [p for p in keys if _has_property(source, p)]
+    dest_properties = [p for p in keys if _has_property(dest, p)]
+    return all(map(match, source_properties)) if source_properties == dest_properties else False
+def _handle_local_property(values: list, properties: list, local_id: str):
+    # Handle "impactAssessment.@id" if present in the data
+    existing_id = local_id.replace('.id', '.@id')
+    if local_id in properties:
+        # remove if not used
+        if not _values_have_property(values, local_id):
+            properties.remove(local_id)
+        # add if used
+        if _values_have_property(values, existing_id):
+            properties.append(existing_id)
+    return properties
+def _find_match_el_index(values: list, el: dict, same_methodModel: bool, model: dict, node_type: str):
+    """
+    Find an element in the values that match the new element, based on the unique properties.
+    To find a matching element:
+    1. Update list of properties to handle `methodModel.@id` and `impactAssessment.@id`
+    2. Filter values that have the same unique properties as el
+    3. Make sure all shared unique properties are identical
+    """
+    properties = _matching_properties(model, node_type)
+    properties = list(set(properties + [_METHOD_MODEL_KEY])) if same_methodModel else [
+        p for p in properties if p != _METHOD_MODEL_KEY
+    ]
+    properties = _handle_local_property(values, properties, 'impactAssessment.id')
+    return next((i for i in range(len(values)) if _match_el(values[i], el, properties)), None) if properties else None
+def merge(source: list, merge_with: list, version: str, model: dict = {}, merge_args: dict = {}, node_type: str = ''):
+    source = source if source is not None else []
+    # only merge node if it has the same `methodModel`
+    same_methodModel = merge_args.get('sameMethodModel', False)
+    # only merge if the
+    skip_same_term = merge_args.get('skipSameTerm', False)
+    for el in _non_empty_list(merge_with):
+        source_index = _find_match_el_index(source, el, same_methodModel, model, node_type)
+        if source_index is None:
+            source.append(update_node_version(version, el))
+        elif not skip_same_term:
+            source[source_index] = merge_node(source[source_index], el, version, model, merge_args)
+    return source

hestia_earth/orchestrator/strategies/merge/merge_node.py ADDED Viewed

@@ -0,0 +1,59 @@
+import pydash
+from hestia_earth.schema import EmissionMethodTier
+from hestia_earth.orchestrator.log import logger, logShouldMerge
+from hestia_earth.orchestrator.utils import update_node_version, _average
+_METHOD_TIER_ORDER = [
+    EmissionMethodTier.NOT_RELEVANT.value,
+    EmissionMethodTier.TIER_1.value,
+    EmissionMethodTier.TIER_2.value,
+    EmissionMethodTier.TIER_3.value,
+    EmissionMethodTier.MEASURED.value,
+    EmissionMethodTier.BACKGROUND.value
+]
+def _has_threshold_diff(source: dict, dest: dict, key: str, threshold: float):
+    term_id = dest.get('term', {}).get('@id', dest.get('@id'))
+    source_value = _average(source.get(key), None)
+    dest_value = _average(dest.get(key), None)
+    delta = None if any([source_value is None, dest_value is None]) else (
+        abs(source_value - dest_value) / (1 if source_value == 0 else source_value)
+    )
+    should_merge = source_value is None or (delta is not None and delta > threshold)
+    logger.debug('merge %s for %s with threshold=%s, delta=%s: %s', key, term_id, threshold, delta, should_merge)
+    return should_merge
+def _should_merge_threshold(source: dict, dest: dict, args: dict):
+    [key, threshold] = args.get('replaceThreshold', [None, 0])
+    return True if key is None else _has_threshold_diff(source, dest, key, threshold)
+def _should_merge_lower_tier(source: dict, dest: dict, args: dict):
+    source_tier = _METHOD_TIER_ORDER.index(source.get('methodTier', _METHOD_TIER_ORDER[0]))
+    dest_tier = _METHOD_TIER_ORDER.index(dest.get('methodTier', _METHOD_TIER_ORDER[-1]))
+    term_id = dest.get('term', {}).get('@id', dest.get('@id'))
+    should_merge = args.get('replaceLowerTier', False) or dest_tier >= source_tier
+    logger.debug('merge for %s with original tier=%s, new tier=%s: %s',
+                 term_id, source.get('methodTier'), dest.get('methodTier'), should_merge)
+    return should_merge
+_MERGE_FROM_ARGS = {
+    'replaceThreshold': _should_merge_threshold,
+    'replaceLowerTier': _should_merge_lower_tier
+}
+def merge(source: dict, dest: dict, version: str, model: dict = {}, merge_args: dict = {}, *args):
+    merge_args = {
+        key: func(source, dest, merge_args) for key, func in _MERGE_FROM_ARGS.items()
+    } if source is not None else {}
+    term_id = dest.get('term', {}).get('@id', dest.get('@id'))
+    should_merge = all([v for _k, v in merge_args.items()])
+    logShouldMerge(source, model.get('model'), term_id, should_merge, key=model.get('key'), value=term_id, **merge_args)
+    return update_node_version(version, pydash.objects.merge({}, source, dest), source) if should_merge else source

hestia_earth/orchestrator/strategies/run/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+import importlib
+from hestia_earth.orchestrator.utils import get_required_model_param
+def should_run(data: dict, model: dict):
+    strategy = get_required_model_param(model, 'runStrategy')
+    return importlib.import_module(f"hestia_earth.orchestrator.strategies.run.{strategy}").should_run(data, model)

hestia-earth-models 0.65.6__py3-none-any.whl → 0.65.7__py3-none-any.whl

hestia-earth-models 0.65.6py3-none-any.whl → 0.65.7py3-none-any.whl