PyPI - hestia-earth-utils - Versions diffs - 0.16.9__py3-none-any.whl → 0.16.10__py3-none-any.whl - Mend

hestia-earth-utils 0.16.9py3-none-any.whl → 0.16.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

hestia_earth/utils/api.py +78 -36
hestia_earth/utils/blank_node.py +101 -60
hestia_earth/utils/calculation_status.py +45 -35
hestia_earth/utils/cycle.py +7 -7
hestia_earth/utils/date.py +7 -2
hestia_earth/utils/descriptive_stats.py +10 -6
hestia_earth/utils/emission.py +26 -15
hestia_earth/utils/lookup.py +62 -28
hestia_earth/utils/lookup_utils.py +89 -63
hestia_earth/utils/model.py +45 -40
hestia_earth/utils/pipeline.py +179 -90
hestia_earth/utils/pivot/_shared.py +16 -12
hestia_earth/utils/pivot/pivot_csv.py +35 -18
hestia_earth/utils/pivot/pivot_json.py +34 -18
hestia_earth/utils/request.py +17 -6
hestia_earth/utils/stats.py +89 -68
hestia_earth/utils/storage/_azure_client.py +17 -6
hestia_earth/utils/storage/_local_client.py +8 -3
hestia_earth/utils/storage/_s3_client.py +27 -22
hestia_earth/utils/storage/_sns_client.py +7 -2
hestia_earth/utils/term.py +5 -5
hestia_earth/utils/tools.py +50 -21
hestia_earth/utils/version.py +1 -1
{hestia_earth_utils-0.16.9.dist-info → hestia_earth_utils-0.16.10.dist-info}/METADATA +1 -1
hestia_earth_utils-0.16.10.dist-info/RECORD +33 -0
hestia_earth_utils-0.16.9.dist-info/RECORD +0 -33
{hestia_earth_utils-0.16.9.data → hestia_earth_utils-0.16.10.data}/scripts/hestia-format-upload +0 -0
{hestia_earth_utils-0.16.9.data → hestia_earth_utils-0.16.10.data}/scripts/hestia-pivot-csv +0 -0
{hestia_earth_utils-0.16.9.dist-info → hestia_earth_utils-0.16.10.dist-info}/WHEEL +0 -0
{hestia_earth_utils-0.16.9.dist-info → hestia_earth_utils-0.16.10.dist-info}/top_level.txt +0 -0

hestia_earth/utils/pipeline.py CHANGED Viewed

@@ -5,14 +5,19 @@ import numpy as np
 from .tools import current_time_ms, non_empty_list, flatten
 from .api import find_related
 from .storage._s3_client import (
-    _load_from_bucket, _upload_to_bucket, _last_modified, _read_metadata, _update_metadata, _exists_in_bucket
+    _load_from_bucket,
+    _upload_to_bucket,
+    _last_modified,
+    _read_metadata,
+    _update_metadata,
+    _exists_in_bucket,
 )
 from .storage._sns_client import _get_sns_client
-PROGRESS_EXT = '.progress'
-CALC_FOLDER = 'recalculated'
-METADATA_STAGE_KEY = 'stage'
-METADATA_PROGRESS_KEY = 'calculating'
+PROGRESS_EXT = ".progress"
+CALC_FOLDER = "recalculated"
+METADATA_STAGE_KEY = "stage"
+METADATA_PROGRESS_KEY = "calculating"
 # fix error "Object of type int64 is not JSON serializable"
@@ -27,10 +32,12 @@ class NpEncoder(json.JSONEncoder):
         return super(NpEncoder, self).default(obj)
-def to_string(data: dict, indent: int = None): return json.dumps(data, indent=indent, ensure_ascii=False, cls=NpEncoder)
+def to_string(data: dict, indent: int = None):
+    return json.dumps(data, indent=indent, ensure_ascii=False, cls=NpEncoder)
-def to_bytes(data: dict): return to_string(data).encode('utf8')
+def to_bytes(data: dict):
+    return to_string(data).encode("utf8")
 def upload_json(bucket_name: str, file_key: str, body: dict):
@@ -38,15 +45,21 @@ def upload_json(bucket_name: str, file_key: str, body: dict):
         bucket=bucket_name,
         key=file_key,
         body=to_bytes(body),
-        content_type='application/json'
+        content_type="application/json",
     )
 def _to_file_progress(filepath: str):
-    return filepath.replace('.csv', PROGRESS_EXT).replace('.json', PROGRESS_EXT).replace('.hestia', PROGRESS_EXT)
+    return (
+        filepath.replace(".csv", PROGRESS_EXT)
+        .replace(".json", PROGRESS_EXT)
+        .replace(".hestia", PROGRESS_EXT)
+    )
-def handle_result(bucket_name: str, file_key: str, step: str, start: int, content: dict):
+def handle_result(
+    bucket_name: str, file_key: str, step: str, start: int, content: dict
+):
     filepath = _to_file_progress(file_key)
     # try to read existing progress to update the time per step
@@ -55,89 +68,122 @@ def handle_result(bucket_name: str, file_key: str, step: str, start: int, conten
     except Exception:
         data = {}
-    return upload_json(bucket_name, filepath, {
-        **data,
-        'step': step,
-        'time': {
-            **(data.get('time', {}) if isinstance(data.get('time', {}), dict) else {}),
-            step: current_time_ms() - start
+    return upload_json(
+        bucket_name,
+        filepath,
+        {
+            **data,
+            "step": step,
+            "time": {
+                **(
+                    data.get("time", {})
+                    if isinstance(data.get("time", {}), dict)
+                    else {}
+                ),
+                step: current_time_ms() - start,
+            },
+            **content,
         },
-        **content
-    })
+    )
 def handle_error(
-    bucket_name: str, file_key: str, step: str, start: int,
-    err: str = '', stack: str = '', errors=[], warnings=[],
-    extras: dict = {}
+    bucket_name: str,
+    file_key: str,
+    step: str,
+    start: int,
+    err: str = "",
+    stack: str = "",
+    errors=[],
+    warnings=[],
+    extras: dict = {},
 ):
-    return handle_result(bucket_name, file_key, step, start, extras | {
-        'success': False,
-        'error': {
-            'message': err,
-            'stack': stack,
-            'errors': errors,
-            'warnings': warnings
+    return handle_result(
+        bucket_name,
+        file_key,
+        step,
+        start,
+        extras
+        | {
+            "success": False,
+            "error": {
+                "message": err,
+                "stack": stack,
+                "errors": errors,
+                "warnings": warnings,
+            },
+            "warning": {"warnings": warnings},
         },
-        'warning': {
-            'warnings': warnings
-        }
-    })
+    )
-def handle_success(bucket_name: str, file_key: str, step: str, start: int, extras: dict = {}):
-    return handle_result(bucket_name, file_key, step, start, extras | {'success': True})
+def handle_success(
+    bucket_name: str, file_key: str, step: str, start: int, extras: dict = {}
+):
+    return handle_result(bucket_name, file_key, step, start, extras | {"success": True})
-def publish_result(topic_arn: str, bucket_name: str, file_key: str, filepath: str, step: str, success: bool):
+def publish_result(
+    topic_arn: str,
+    bucket_name: str,
+    file_key: str,
+    filepath: str,
+    step: str,
+    success: bool,
+):
     return _get_sns_client().publish(
         TopicArn=topic_arn,
-        Message=to_string({
-            'bucket': bucket_name,
-            'key': file_key,
-            'filepath': filepath
-        }),
+        Message=to_string(
+            {"bucket": bucket_name, "key": file_key, "filepath": filepath}
+        ),
         MessageAttributes={
-            'functionName': {
-                'DataType': 'String',
-                'StringValue': step + ('Done' if success else 'Error')
+            "functionName": {
+                "DataType": "String",
+                "StringValue": step + ("Done" if success else "Error"),
             }
-        }
+        },
     )
 def _parse_event_s3(event: dict):
-    return {'bucket': event['s3']['bucket']['name'], 'key': event['s3']['object']['key']}
+    return {
+        "bucket": event["s3"]["bucket"]["name"],
+        "key": event["s3"]["object"]["key"],
+    }
 def _parse_event_SNS(event: dict):
-    event = event.get('Sns', {})
-    data = json.loads(event.get('Message', '{}'))
-    attributes: dict = event.get('MessageAttributes', {})
-    data['attributes'] = {key: value.get('Value') for key, value in attributes.items()}
+    event = event.get("Sns", {})
+    data = json.loads(event.get("Message", "{}"))
+    attributes: dict = event.get("MessageAttributes", {})
+    data["attributes"] = {key: value.get("Value") for key, value in attributes.items()}
     return data
 def _parse_event_SQS(event: dict):
-    condition = event.get('requestContext', {}).get('condition')
-    return _get_data_from_event(event.get('requestPayload', {})) if condition != 'RetriesExhausted' else None
+    condition = event.get("requestContext", {}).get("condition")
+    return (
+        _get_data_from_event(event.get("requestPayload", {}))
+        if condition != "RetriesExhausted"
+        else None
+    )
 def _get_data_from_event(event):  # noqa: C901
     if isinstance(event, dict):
-        if 's3' in event:
+        if "s3" in event:
             return _parse_event_s3(event)
         # invoked when running asynchronously
-        if 'Sns' in event:
+        if "Sns" in event:
             return _parse_event_SNS(event)
         # invoked through http event
-        if 'body' in event:
-            return _get_data_from_event(json.loads(event.get('body', '{}')))
+        if "body" in event:
+            return _get_data_from_event(json.loads(event.get("body", "{}")))
         # invoked through s3 put object
-        if 'Records' in event:
-            return flatten(map(_get_data_from_event, event.get('Records', [])))
+        if "Records" in event:
+            return flatten(map(_get_data_from_event, event.get("Records", [])))
         # invoked when calculation timedout or failed
-        if 'requestPayload' in event:
+        if "requestPayload" in event:
             return _parse_event_SQS(event)
         return event
     if isinstance(event, str):
@@ -149,34 +195,51 @@ def parse_event(event: dict):
     return non_empty_list(flatten(data) if isinstance(data, list) else [data])
-def _node_type(node: dict): return node.get('@type', node.get('type'))
+def _node_type(node: dict):
+    return node.get("@type", node.get("type"))
-def _node_id(node: dict): return node.get('@id', node.get('id'))
+def _node_id(node: dict):
+    return node.get("@id", node.get("id"))
-def _node_path(node: dict, folder: str = ''): return join(folder, _node_type(node), f"{_node_id(node)}.jsonld")
+def _node_path(node: dict, folder: str = ""):
+    return join(folder, _node_type(node), f"{_node_id(node)}.jsonld")
-def _load_node(bucket: str, file_key: str): return json.loads(_load_from_bucket(bucket, file_key))
+def _load_node(bucket: str, file_key: str):
+    return json.loads(_load_from_bucket(bucket, file_key))
-def _cache_path(node: dict): return join(_node_type(node), f"{_node_id(node)}.cache")
+def _cache_path(node: dict):
+    return join(_node_type(node), f"{_node_id(node)}.cache")
-def _has_cache(bucket: str, node: dict): return _exists_in_bucket(bucket, _cache_path(node))
+def _has_cache(bucket: str, node: dict):
+    return _exists_in_bucket(bucket, _cache_path(node))
-def is_calculating(bucket: str, node: dict, folder: str = ''):
-    return _read_metadata(bucket, _node_path(node, folder)).get(METADATA_PROGRESS_KEY, 'false') == 'true'
+def is_calculating(bucket: str, node: dict, folder: str = ""):
+    return (
+        _read_metadata(bucket, _node_path(node, folder)).get(
+            METADATA_PROGRESS_KEY, "false"
+        )
+        == "true"
+    )
-def set_calculating(bucket: str, node: dict, in_progress: bool, folder: str = ''):
-    return _update_metadata(bucket, _node_path(node, folder), {METADATA_PROGRESS_KEY: str(in_progress).lower()})
+def set_calculating(bucket: str, node: dict, in_progress: bool, folder: str = ""):
+    return _update_metadata(
+        bucket,
+        _node_path(node, folder),
+        {METADATA_PROGRESS_KEY: str(in_progress).lower()},
+    )
 def get_stage(bucket: str, node: dict, folder: str = CALC_FOLDER):
-    stage = _read_metadata(bucket, _node_path(node, folder=CALC_FOLDER)).get(METADATA_STAGE_KEY)
+    stage = _read_metadata(bucket, _node_path(node, folder=CALC_FOLDER)).get(
+        METADATA_STAGE_KEY
+    )
     return int(stage) if stage else stage
@@ -196,29 +259,43 @@ def load_cache(bucket: str, node: dict):
     dict
         The cached data.
     """
-    cache_path = join(node['@type'], f"{node['@id']}.cache")
+    cache_path = join(node["@type"], f"{node['@id']}.cache")
     try:
         return json.loads(_load_from_bucket(bucket, cache_path))
     except Exception:
-        print('No cache found for', cache_path)
+        print("No cache found for", cache_path)
         return {}
-def _filter_by_type(nodes: list, type: str): return [n for n in nodes if n.get('@type', n.get('type')) == type]
+def _filter_by_type(nodes: list, type: str):
+    return [n for n in nodes if n.get("@type", n.get("type")) == type]
-def _find_related_nodes(from_type: str, from_id: str, related_type: str, related_key: str):
-    should_find_related = related_key == 'related'
-    print('Find related nodes from API', from_type, from_id, related_key, related_type)
-    return find_related(from_type, from_id, related_type, limit=10000) if should_find_related else []
+def _find_related_nodes(
+    from_type: str, from_id: str, related_type: str, related_key: str
+):
+    should_find_related = related_key == "related"
+    print("Find related nodes from API", from_type, from_id, related_key, related_type)
+    return (
+        find_related(from_type, from_id, related_type, limit=10000)
+        if should_find_related
+        else []
+    )
-def _get_cached_nodes(cache: dict, related_key: str, from_type: str, from_id: str, to_type: str):
+def _get_cached_nodes(
+    cache: dict, related_key: str, from_type: str, from_id: str, to_type: str
+):
     # if key is in cache, use nodes in cache, otherwise use API
     if related_key in cache:
         nodes = _filter_by_type(cache.get(related_key, []), to_type)
-        print('Using cached data to', related_key, to_type, nodes)
-        return list(map(lambda node: {'@type': to_type, '@id': node.get('@id', node.get('id'))}, nodes))
+        print("Using cached data to", related_key, to_type, nodes)
+        return list(
+            map(
+                lambda node: {"@type": to_type, "@id": node.get("@id", node.get("id"))},
+                nodes,
+            )
+        )
     else:
         return _find_related_nodes(from_type, from_id, to_type, related_key)
@@ -244,15 +321,22 @@ def get_related_nodes(node: dict, related_key: str, related_type: str, cache: di
     List[dict]
         The related nodes.
     """
-    from_type = node.get('@type', node.get('type'))
-    from_id = node.get('@id', node.get('id'))
+    from_type = node.get("@type", node.get("type"))
+    from_id = node.get("@id", node.get("id"))
-    related_nodes = _get_cached_nodes(cache or {}, related_key, from_type, from_id, related_type) or []
+    related_nodes = (
+        _get_cached_nodes(cache or {}, related_key, from_type, from_id, related_type)
+        or []
+    )
-    return list({f"{node['@type']}/{node['@id']}": node for node in related_nodes}.values())
+    return list(
+        {f"{node['@type']}/{node['@id']}": node for node in related_nodes}.values()
+    )
-def get_related_nodes_data(bucket_name: str, node: dict, related_key: str, related_type: str, cache: dict):
+def get_related_nodes_data(
+    bucket_name: str, node: dict, related_key: str, related_type: str, cache: dict
+):
     """
     Given a node, return all related nodes with extra data.
@@ -275,14 +359,19 @@ def get_related_nodes_data(bucket_name: str, node: dict, related_key: str, relat
     List[dict]
         The related nodes with extra data: `indexed_at`, `recalculated_at` and `recalculated_stage`.
     """
-    related_nodes = get_related_nodes(node=node, related_key=related_key, related_type=related_type, cache=cache)
+    related_nodes = get_related_nodes(
+        node=node, related_key=related_key, related_type=related_type, cache=cache
+    )
     return [
-        node | {
-            'indexed_at': _last_modified(bucket=bucket_name, key=_node_path(node)),
-            'recalculated_at': _last_modified(bucket=bucket_name, key=_node_path(node, folder=CALC_FOLDER)),
-            'recalculated_stage': get_stage(bucket_name, node),
-            'is_calculating': is_calculating(bucket_name, node)
+        node
+        | {
+            "indexed_at": _last_modified(bucket=bucket_name, key=_node_path(node)),
+            "recalculated_at": _last_modified(
+                bucket=bucket_name, key=_node_path(node, folder=CALC_FOLDER)
+            ),
+            "recalculated_stage": get_stage(bucket_name, node),
+            "is_calculating": is_calculating(bucket_name, node),
         }
         for node in related_nodes
     ]

hestia_earth/utils/pivot/_shared.py CHANGED Viewed

@@ -5,10 +5,12 @@ from hestia_earth.schema import EmissionMethodTier
 EXCLUDE_FIELDS = ["@type", "type", "@context"]
 EXCLUDE_PRIVATE_FIELDS = [
-    "added", "addedVersion",
-    "updated", "updatedVersion",
+    "added",
+    "addedVersion",
+    "updated",
+    "updatedVersion",
     "aggregatedVersion",
-    "_cache"
+    "_cache",
 ]
@@ -17,11 +19,11 @@ def _with_csv_formatting(dct):
     Use as object_hook when parsing a JSON node: json.loads(node, object_hook=_with_csv_formatting).
     Ensures parsed JSON has field values formatted according to hestia csv conventions.
     """
-    if 'boundary' in dct:
-        dct['boundary'] = json.dumps(dct['boundary'])
+    if "boundary" in dct:
+        dct["boundary"] = json.dumps(dct["boundary"])
     for key, value in dct.items():
         if _is_scalar_list(value):
-            dct[key] = ';'.join([str(el) for el in value])
+            dct[key] = ";".join([str(el) for el in value])
     return dct
@@ -37,15 +39,17 @@ def _is_scalar_list(value):
 def _filter_not_relevant(blank_node: dict):
-    return blank_node.get('methodTier') != EmissionMethodTier.NOT_RELEVANT.value
+    return blank_node.get("methodTier") != EmissionMethodTier.NOT_RELEVANT.value
 def _filter_emissions_not_relevant(node: dict):
     """
     Ignore all emissions where `methodTier=not relevant` so save space.
     """
-    return node | ({
-        key: list(filter(_filter_not_relevant, node[key]))
-        for key in ['emissions', 'emissionsResourceUse']
-        if key in node
-    })
+    return node | (
+        {
+            key: list(filter(_filter_not_relevant, node[key]))
+            for key in ["emissions", "emissionsResourceUse"]
+            if key in node
+        }
+    )

hestia_earth/utils/pivot/pivot_csv.py CHANGED Viewed

@@ -9,7 +9,12 @@ from flatten_json import flatten as flatten_json
 # __package__ = "hestia_earth.utils" # required to run interactively in vscode
 from ..api import find_term_ids_by_names
-from ._shared import EXCLUDE_FIELDS, EXCLUDE_PRIVATE_FIELDS, _with_csv_formatting, _filter_emissions_not_relevant
+from ._shared import (
+    EXCLUDE_FIELDS,
+    EXCLUDE_PRIVATE_FIELDS,
+    _with_csv_formatting,
+    _filter_emissions_not_relevant,
+)
 # We only want to pivot array items containing blank nodes
@@ -18,11 +23,13 @@ def _get_blank_node_uniqueness_fields():
     filtered_uniqueness_fields = copy.deepcopy(UNIQUENESS_FIELDS)
     for node_type, array_fields in UNIQUENESS_FIELDS.items():
         for array_field in array_fields.keys():
-            if SORT_CONFIG[node_type][array_field]['type'] in NODE_TYPES:
+            if SORT_CONFIG[node_type][array_field]["type"] in NODE_TYPES:
                 del filtered_uniqueness_fields[node_type][array_field]
             # include `impactAssessment.@id` since it is not part of original uniqueness
-            if 'impactAssessment.id' in array_fields[array_field]:
-                filtered_uniqueness_fields[node_type][array_field].append('impactAssessment.@id')
+            if "impactAssessment.id" in array_fields[array_field]:
+                filtered_uniqueness_fields[node_type][array_field].append(
+                    "impactAssessment.@id"
+                )
     return filtered_uniqueness_fields
@@ -226,15 +233,17 @@ def _do_pivot(df_in, name_id_dict):
                         deep_pivoted, left_index=True, right_index=True, how="outer"
                     )
-            field_cols.dropna(axis=0, how='all', inplace=True)
+            field_cols.dropna(axis=0, how="all", inplace=True)
-            with_grouped_cols = field_cols.T.groupby(
-                _get_term_index, group_keys=True
-            ).apply(
-                _group_by_term,
-                name_id_dict=name_id_dict,
-                uniqueness_fields=uniqueness_fields
-            ).T
+            with_grouped_cols = (
+                field_cols.T.groupby(_get_term_index, group_keys=True)
+                .apply(
+                    _group_by_term,
+                    name_id_dict=name_id_dict,
+                    uniqueness_fields=uniqueness_fields,
+                )
+                .T
+            )
             pivoted_terms = with_grouped_cols.apply(
                 _pivot_row, axis=1, uniqueness_fields=uniqueness_fields
@@ -242,9 +251,13 @@ def _do_pivot(df_in, name_id_dict):
             # merge any duplicated columns caused by shuffled term positions
             # this operation coincidentally sorts the columns alphabetically
-            pivoted_terms = pivoted_terms.T.groupby(
-                level=pivoted_terms.columns.nlevels - 1, group_keys=False
-            ).apply(lambda term: term.bfill().iloc[0, :]).T
+            pivoted_terms = (
+                pivoted_terms.T.groupby(
+                    level=pivoted_terms.columns.nlevels - 1, group_keys=False
+                )
+                .apply(lambda term: term.bfill().iloc[0, :])
+                .T
+            )
             pivoted_terms.columns = map(
                 lambda col: f"{nt_label}.{field}.{col}", pivoted_terms.columns
@@ -265,14 +278,18 @@ def _format_and_pivot(df_in):
     df_out = _do_pivot(df_in.copy(), name_id_dict)
     _sort_inplace(df_out)
-    df_out = df_out.astype('object')
+    df_out = df_out.astype("object")
     df_out.fillna("-", inplace=True)
     return df_out
 def nodes_to_df(nodes: list[dict]):
     nodes_flattened = [
-        flatten_json(dict([(_get_node_type_label(node.get("@type", node.get('type'))), node)]), '.') for node in nodes
+        flatten_json(
+            dict([(_get_node_type_label(node.get("@type", node.get("type"))), node)]),
+            ".",
+        )
+        for node in nodes
     ]
     return pd.json_normalize(nodes_flattened)
@@ -306,7 +323,7 @@ def pivot_hestia_file(hestia_file: str):
         Pandas dataframe with pivoted array terms
     """
     parsed = json.loads(hestia_file, object_hook=_with_csv_formatting)
-    nodes = parsed.get('nodes', [parsed])
+    nodes = parsed.get("nodes", [parsed])
     return pivot_nodes(nodes)

hestia_earth/utils/pivot/pivot_json.py CHANGED Viewed

@@ -6,10 +6,17 @@ from collections import defaultdict
 from copy import deepcopy
 from hestia_earth.utils.pipeline import _node_type
-from ._shared import EXCLUDE_FIELDS, EXCLUDE_PRIVATE_FIELDS, _with_csv_formatting, _filter_emissions_not_relevant
+from ._shared import (
+    EXCLUDE_FIELDS,
+    EXCLUDE_PRIVATE_FIELDS,
+    _with_csv_formatting,
+    _filter_emissions_not_relevant,
+)
 pivot_exclude_fields = Term().fields
-pivot_exclude_fields.update({k: "" for k in EXCLUDE_FIELDS} | {k: "" for k in EXCLUDE_PRIVATE_FIELDS})
+pivot_exclude_fields.update(
+    {k: "" for k in EXCLUDE_FIELDS} | {k: "" for k in EXCLUDE_PRIVATE_FIELDS}
+)
 term_exclude_fields = Term().fields
 del term_exclude_fields["name"]
@@ -31,8 +38,10 @@ for node_type, array_fields in UNIQUENESS_FIELDS.items():
                 if f not in ("properties.term.@id", "properties.value")
             ]
         # include `impactAssessment.@id` since it is not part of original uniqueness
-        if 'impactAssessment.id' in array_fields[array_field]:
-            ADAPTED_UNIQUENESS_FIELDS[node_type][array_field].append('impactAssessment.@id')
+        if "impactAssessment.id" in array_fields[array_field]:
+            ADAPTED_UNIQUENESS_FIELDS[node_type][array_field].append(
+                "impactAssessment.@id"
+            )
 def _combine_node_ids(nodes: list):
@@ -42,13 +51,18 @@ def _combine_node_ids(nodes: list):
 def _base_pivoted_value(key: str, value, is_top_level: bool):
     # handle list of Nodes
     return (
-        _combine_node_ids(value) if isinstance(value[0], dict) and value[0].get('@type') in NODE_TYPES else
-        json.dumps(value, separators=(',', ':')) if any([
-            is_top_level,
-            key in ['distribution']
-        ]) else
-        value
-    ) if isinstance(value, list) else value
+        (
+            _combine_node_ids(value)
+            if isinstance(value[0], dict) and value[0].get("@type") in NODE_TYPES
+            else (
+                json.dumps(value, separators=(",", ":"))
+                if any([is_top_level, key in ["distribution"]])
+                else value
+            )
+        )
+        if isinstance(value, list)
+        else value
+    )
 def _do_pivot(node, parent_node_type=None, parent_field=None, level=0):  # noqa: C901
@@ -57,13 +71,15 @@ def _do_pivot(node, parent_node_type=None, parent_field=None, level=0):  # noqa:
     if node_type not in ADAPTED_UNIQUENESS_FIELDS:
         return node
     pivoted_node = {
-        field: _base_pivoted_value(field, value, level==0)
+        field: _base_pivoted_value(field, value, level == 0)
         for field, value in node.items()
-        if all([
-            field not in ADAPTED_UNIQUENESS_FIELDS[node_type],
-            node_type != 'Term' or field not in term_exclude_fields,
-            field not in EXCLUDE_PRIVATE_FIELDS
-        ])
+        if all(
+            [
+                field not in ADAPTED_UNIQUENESS_FIELDS[node_type],
+                node_type != "Term" or field not in term_exclude_fields,
+                field not in EXCLUDE_PRIVATE_FIELDS,
+            ]
+        )
     }
     fields_to_pivot = [
@@ -227,7 +243,7 @@ def pivot_hestia_file(hestia_file: str):
     Pivot json array of schema-compliant nodes on 'nodes' key of unparsed json string
     """
     parsed = json.loads(hestia_file, object_hook=_with_csv_formatting)
-    return pivot_nodes(parsed.get('nodes', []))
+    return pivot_nodes(parsed.get("nodes", []))
 def pivot_nodes(nodes: list[dict]):

hestia-earth-utils 0.16.9__py3-none-any.whl → 0.16.10__py3-none-any.whl

hestia-earth-utils 0.16.9py3-none-any.whl → 0.16.10py3-none-any.whl