PyPI - hestia-earth-utils - Versions diffs - 0.16.13__tar.gz → 0.16.15__tar.gz - Mend

hestia-earth-utils 0.16.13tar.gz → 0.16.15tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

{hestia_earth_utils-0.16.13/hestia_earth_utils.egg-info → hestia_earth_utils-0.16.15}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hestia_earth_utils
-Version: 0.16.13
+Version: 0.16.15
 Summary: HESTIA's utils library
 Home-page: https://gitlab.com/hestia-earth/hestia-utils
 Author: HESTIA Team

{hestia_earth_utils-0.16.13 → hestia_earth_utils-0.16.15}/hestia_earth/utils/blank_node.py RENAMED Viewed

@@ -186,6 +186,38 @@ def get_node_value(
 _BLANK_NODE_GROUPING_KEYS = {TermTermType.EMISSION: ["methodModel"]}
+def _pluralize_key(key: str):
+    return key + ("" if key.endswith("s") else "s")
+def _blank_node_ids(values: list):
+    return sorted(list(set(list(map(lambda v: v.get("@id"), values)))))
+def _blank_node_sub_values(blank_nodes: list, key: str):
+    values = flatten(map(lambda v: v.get(key, []), blank_nodes))
+    return {_pluralize_key(key): _blank_node_ids(values)} if values else {}
+def _blank_node_data(blank_nodes: list):
+    value = get_node_value(
+        {
+            "term": blank_nodes[0].get("term"),
+            "value": list(map(get_node_value, blank_nodes)),
+        }
+    )
+    sub_values = ["inputs", "operation", "transformation"]
+    has_cycle_value = any(
+        [
+            all([get_node_value(v) is not None, not v.get("transformation")])
+            for v in blank_nodes
+        ]
+    )
+    return {"value": value, "hasCycleValue": has_cycle_value} | reduce(
+        lambda p, c: p | _blank_node_sub_values(blank_nodes, c), sub_values, {}
+    )
 def get_blank_nodes_calculation_status(
     node: dict, list_key: str, termType: TermTermType
 ):
@@ -214,20 +246,6 @@ def get_blank_nodes_calculation_status(
     blank_nodes_by_term = group_by_keys(blank_nodes, ["term"])
     blank_nodes_grouping_keys = _BLANK_NODE_GROUPING_KEYS.get(termType) or []
-    def blank_node_data(blank_nodes: list):
-        value = get_node_value(
-            {
-                "term": blank_nodes[0].get("term"),
-                "value": list(map(get_node_value, blank_nodes)),
-            }
-        )
-        inputs = flatten(map(lambda v: v.get("inputs", []), blank_nodes))
-        return {"value": value} | (
-            {"inputs": sorted(list(map(lambda v: v.get("@id"), inputs)))}
-            if inputs
-            else {}
-        )
     def map_blank_node(term_id: str):
         values = blank_nodes_by_term.get(term_id, [])
         grouped_blank_nodes = (
@@ -239,9 +257,9 @@ def get_blank_nodes_calculation_status(
             {}
             if not values
             else (
-                {k: blank_node_data(v) for k, v in grouped_blank_nodes.items()}
+                {k: _blank_node_data(v) for k, v in grouped_blank_nodes.items()}
                 if grouped_blank_nodes
-                else blank_node_data([values[0]])
+                else _blank_node_data([values[0]])
             )
         )

{hestia_earth_utils-0.16.13 → hestia_earth_utils-0.16.15}/hestia_earth/utils/cycle.py RENAMED Viewed

@@ -4,10 +4,16 @@ from .tools import flatten
 from .blank_node import get_blank_nodes_calculation_status
-def _extend_missing_inputs(value: dict, input_ids: set):
-    included_inputs = set(flatten([v.get("inputs", []) for v in value.values()]))
-    missing_inputs = input_ids - included_inputs
-    return {"missingInputs": sorted(list(missing_inputs))} if missing_inputs else {}
+def _extend_missing_values(
+    value: dict, all_values: set, key: str, must_have_key: bool = False
+):
+    included_values = set(flatten([v.get(key, []) for v in value.values()]))
+    missing_values = all_values - included_values
+    return (
+        {"missing" + key.capitalize(): sorted(list(missing_values))}
+        if all([missing_values, not must_have_key or included_values])
+        else {}
+    )
 def get_cycle_emissions_calculation_status(cycle: dict):
@@ -30,7 +36,20 @@ def get_cycle_emissions_calculation_status(cycle: dict):
         cycle, "emissions", TermTermType.EMISSION
     )
     input_ids = set([v.get("term", {}).get("@id") for v in cycle.get("inputs", [])])
+    transformation_ids = set(
+        [v.get("term", {}).get("@id") for v in cycle.get("transformations", [])]
+    )
     return {
-        k: v | (_extend_missing_inputs(v, input_ids) if "InputsProduction" in k else {})
+        k: v
+        | (
+            _extend_missing_values(v, input_ids, "inputs")
+            if "InputsProduction" in k
+            else {}
+        )
+        | (
+            _extend_missing_values(
+                v, transformation_ids, "transformations", must_have_key=True
+            )
+        )
         for k, v in status.items()
     }

hestia_earth_utils-0.16.15/hestia_earth/utils/pivot/_shared.py ADDED Viewed

@@ -0,0 +1,110 @@
+import json
+import numpy as np
+import pandas as pd
+from hestia_earth.schema import SCHEMA_TYPES, NODE_TYPES, EmissionMethodTier
+from flatten_json import flatten as flatten_json
+from ..tools import list_sum
+EXCLUDE_FIELDS = ["@type", "type", "@context"]
+EXCLUDE_PRIVATE_FIELDS = [
+    "added",
+    "addedVersion",
+    "updated",
+    "updatedVersion",
+    "aggregatedVersion",
+    "_cache",
+]
+# assuming column labels always camelCase
+def _get_node_type_label(node_type):
+    return node_type[0].lower() + node_type[1:]
+def _get_node_type_from_label(node_type):
+    return node_type[0].upper() + node_type[1:]
+def _is_blank_node(data: dict):
+    node_type = data.get("@type") or data.get("type")
+    return node_type in SCHEMA_TYPES and node_type not in NODE_TYPES
+def _with_csv_formatting(dct):
+    """
+    Use as object_hook when parsing a JSON node: json.loads(node, object_hook=_with_csv_formatting).
+    Ensures parsed JSON has field values formatted according to hestia csv conventions.
+    """
+    if "boundary" in dct:
+        dct["boundary"] = json.dumps(dct["boundary"])
+    for key, value in dct.items():
+        if _is_scalar_list(value):
+            dct[key] = ";".join([str(el) for el in value])
+    return dct
+def _is_scalar_list(value):
+    if not isinstance(value, list):
+        return False
+    all_scalar = True
+    for element in value:
+        if not np.isscalar(element):
+            all_scalar = False
+            break
+    return all_scalar
+def _filter_not_relevant(blank_node: dict):
+    return blank_node.get("methodTier") != EmissionMethodTier.NOT_RELEVANT.value
+def _filter_emissions_not_relevant(node: dict):
+    """
+    Ignore all emissions where `methodTier=not relevant` to save space.
+    """
+    return node | (
+        {
+            key: list(filter(_filter_not_relevant, node[key]))
+            for key in ["emissions", "emissionsResourceUse"]
+            if key in node
+        }
+    )
+def _filter_zero_value(blank_node: dict):
+    value = blank_node.get("value")
+    value = (
+        list_sum(blank_node.get("value"), default=-1)
+        if isinstance(value, list)
+        else value
+    )
+    return value != 0
+def _filter_zero_values(node: dict):
+    """
+    Ignore all blank nodes where `value=0` to save space.
+    """
+    return node | (
+        {
+            key: list(filter(_filter_zero_value, value))
+            for key, value in node.items()
+            if isinstance(value, list)
+            and isinstance(value[0], dict)
+            and _is_blank_node(value[0])
+        }
+    )
+def nodes_to_df(nodes: list[dict]):
+    nodes_flattened = [
+        flatten_json(
+            dict([(_get_node_type_label(node.get("@type", node.get("type"))), node)]),
+            ".",
+        )
+        for node in nodes
+    ]
+    return pd.json_normalize(nodes_flattened)

{hestia_earth_utils-0.16.13 → hestia_earth_utils-0.16.15}/hestia_earth/utils/pivot/pivot_csv.py RENAMED Viewed

@@ -5,7 +5,6 @@ import numpy as np
 import pandas as pd
 from hestia_earth.schema import UNIQUENESS_FIELDS, Term, NODE_TYPES
 from hestia_earth.schema.utils.sort import get_sort_key, SORT_CONFIG
-from flatten_json import flatten as flatten_json
 # __package__ = "hestia_earth.utils" # required to run interactively in vscode
 from ..api import find_term_ids_by_names
@@ -14,6 +13,9 @@ from ._shared import (
     EXCLUDE_PRIVATE_FIELDS,
     _with_csv_formatting,
     _filter_emissions_not_relevant,
+    _get_node_type_label,
+    _get_node_type_from_label,
+    nodes_to_df,
 )
@@ -36,15 +38,6 @@ def _get_blank_node_uniqueness_fields():
 BLANK_NODE_UNIQUENESS_FIELDS = _get_blank_node_uniqueness_fields()
-# assuming column labels always camelCase
-def _get_node_type_label(node_type):
-    return node_type[0].lower() + node_type[1:]
-def _get_node_type_from_label(node_type):
-    return node_type[0].upper() + node_type[1:]
 def _get_names(df):
     names = []
     for node_type, array_fields in BLANK_NODE_UNIQUENESS_FIELDS.items():
@@ -283,18 +276,6 @@ def _format_and_pivot(df_in):
     return df_out
-def nodes_to_df(nodes: list[dict]):
-    nodes_flattened = [
-        flatten_json(
-            dict([(_get_node_type_label(node.get("@type", node.get("type"))), node)]),
-            ".",
-        )
-        for node in nodes
-    ]
-    return pd.json_normalize(nodes_flattened)
 def pivot_nodes(nodes: list[dict]):
     """
     Pivot array of nodes in dict format (e.g under the 'nodes' key of a .hestia file)

{hestia_earth_utils-0.16.13 → hestia_earth_utils-0.16.15}/hestia_earth/utils/pivot/pivot_json.py RENAMED Viewed

@@ -11,6 +11,7 @@ from ._shared import (
     EXCLUDE_PRIVATE_FIELDS,
     _with_csv_formatting,
     _filter_emissions_not_relevant,
+    _filter_zero_values,
 )
 pivot_exclude_fields = Term().fields
@@ -22,6 +23,8 @@ term_exclude_fields = Term().fields
 del term_exclude_fields["name"]
 term_exclude_fields.update({k: "" for k in EXCLUDE_PRIVATE_FIELDS})
+include_all_unique_keys = ["emissions", "emissionsResourceUse"]
 # Treat properties uniqueness fields as special case for now
 PROPERTIES_VIRTUAL_UNIQUENESS_FIELD = "propertyValues"
 ADAPTED_UNIQUENESS_FIELDS = deepcopy(UNIQUENESS_FIELDS)
@@ -90,6 +93,7 @@ def _do_pivot(node, parent_node_type=None, parent_field=None, level=0):  # noqa:
     # print('\n', level, 'fields_to_pivot', fields_to_pivot)
     for field, uniqueness_fields in fields_to_pivot:
+        include_all_unique_fields = field in include_all_unique_keys
         # print('\nbefore processing node field', level, field, node[field], '\n')
         # Compress lists of 'Node' nodes to dict with single @id key.
         # The compressed field matches uniqueness fields like cycle.emissions.inputs.@id.
@@ -155,14 +159,13 @@ def _do_pivot(node, parent_node_type=None, parent_field=None, level=0):  # noqa:
             del term_data["combined_fields"][id_key]
             # print('combined_fields', field, term_id, term_data['combined_fields'], '\n')
             fields_to_include = {
-                field: any(
-                    by_idx[idx].get(field) != by_idx[indexes[0]].get(field)
-                    for idx in indexes
+                k: include_all_unique_fields
+                or any(
+                    by_idx[idx].get(k) != by_idx[indexes[0]].get(k) for idx in indexes
                 )
-                for field in term_data["combined_fields"].keys()
-                if field in uniqueness_fields
-                or field != "value"
-                and field.split(".")[-1] not in pivot_exclude_fields
+                for k in term_data["combined_fields"].keys()
+                if k in uniqueness_fields
+                or (k != "value" and k.split(".")[-1] not in pivot_exclude_fields)
             }
             # print('fields_to_include', level, field, term_id, fields_to_include, '\n')
             for idx in indexes:
@@ -179,19 +182,23 @@ def _do_pivot(node, parent_node_type=None, parent_field=None, level=0):  # noqa:
                     )
                 ]
                 # print('distingishing_field_fields', level, field, term_id, distingishing_field_fields, '\n')
-                unanimous_fields = {
-                    field: term_data["combined_fields"][field]
-                    for field, not_unanimous in fields_to_include.items()
-                    if field not in distingishing_field_fields
-                    and not not_unanimous
-                    and field is not PROPERTIES_VIRTUAL_UNIQUENESS_FIELD
-                }
                 # print('unanimous_fields', level, field, term_id, unanimous_fields, '\n')
+                unanimous_fields = (
+                    {}
+                    if include_all_unique_fields
+                    else {
+                        field: term_data["combined_fields"][field]
+                        for field, not_unanimous in fields_to_include.items()
+                        if field not in distingishing_field_fields
+                        and not not_unanimous
+                        and field is not PROPERTIES_VIRTUAL_UNIQUENESS_FIELD
+                    }
+                )
                 differentiated_fields = {
                     field: term[field]
                     for field, not_unanimous in fields_to_include.items()
                     if field not in distingishing_field_fields
-                    and not_unanimous
+                    and (include_all_unique_fields or not_unanimous)
                     and field in term
                 }
                 # print('differentiated_fields', level, field, term_id, differentiated_fields, '\n')
@@ -223,19 +230,35 @@ def _do_pivot(node, parent_node_type=None, parent_field=None, level=0):  # noqa:
     return pivoted_node
-def pivot_node(node: dict):
+def pivot_node(
+    node: dict,
+    include_emissions_not_relevant: bool = False,
+    include_zero_values: bool = True,
+):
     """
     Pivot single node in dict format parsed with object_hook=_with_csv_formatting
     """
-    return _do_pivot(_filter_emissions_not_relevant(node))
+    node = (
+        node if include_emissions_not_relevant else _filter_emissions_not_relevant(node)
+    )
+    node = node if include_zero_values else _filter_zero_values(node)
+    return _do_pivot(node)
-def pivot_json_node(json_node: str):
+def pivot_json_node(
+    json_node: str,
+    include_emissions_not_relevant: bool = False,
+    include_zero_values: bool = True,
+):
     """
     Pivot single schema-compliant unparsed json string node
     """
     node = json.loads(json_node, object_hook=_with_csv_formatting)
-    return pivot_node(node)
+    return pivot_node(
+        node,
+        include_emissions_not_relevant=include_emissions_not_relevant,
+        include_zero_values=include_zero_values,
+    )
 def pivot_hestia_file(hestia_file: str):
@@ -246,8 +269,19 @@ def pivot_hestia_file(hestia_file: str):
     return pivot_nodes(parsed.get("nodes", []))
-def pivot_nodes(nodes: list[dict]):
+def pivot_nodes(
+    nodes: list[dict],
+    include_emissions_not_relevant: bool = False,
+    include_zero_values: bool = True,
+):
     """
     Pivot multiple nodes in dict format parsed with object_hook=_with_csv_formatting
     """
-    return [pivot_node(node) for node in nodes]
+    return [
+        pivot_node(
+            node,
+            include_emissions_not_relevant=include_emissions_not_relevant,
+            include_zero_values=include_zero_values,
+        )
+        for node in nodes
+    ]

hestia_earth_utils-0.16.15/hestia_earth/utils/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ VERSION = "0.16.15"

{hestia_earth_utils-0.16.13 → hestia_earth_utils-0.16.15/hestia_earth_utils.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hestia_earth_utils
-Version: 0.16.13
+Version: 0.16.15
 Summary: HESTIA's utils library
 Home-page: https://gitlab.com/hestia-earth/hestia-utils
 Author: HESTIA Team

hestia_earth_utils-0.16.15/tests/test_calculation_status.py ADDED Viewed

@@ -0,0 +1,40 @@
+import os
+import json
+from tests.utils import fixtures_path
+from hestia_earth.utils.calculation_status import _emissions_with_status
+fixtures_folder = os.path.join(fixtures_path, "calculation_status")
+def test_emissions_with_status():
+    with open(os.path.join(fixtures_folder, "nodes.json")) as f:
+        nodes = json.load(f)
+    result = _emissions_with_status(nodes[0])
+    assert result == {
+        "emissions-total": 195,
+        "emissions-complete": 55,
+        "emissions-incomplete": 0,
+        "emissions-missing": 140,
+        "emissions": result["emissions"],  # ignore
+    }
+    result = _emissions_with_status(nodes[1])
+    assert result == {
+        "emissions-total": 195,
+        "emissions-complete": 0,
+        "emissions-incomplete": 13,
+        "emissions-missing": 182,
+        "emissions": result["emissions"],  # ignore
+    }
+# def test_get_nodes_calculations_status_dataframe():
+#     with open(os.path.join(fixtures_folder, 'nodes.json')) as f:
+#         nodes = json.load(f)
+#     expected = open(os.path.join(fixtures_folder, 'result.csv'), 'r').read()
+#     df = get_nodes_calculations_status_dataframe(nodes, file_format='csv')
+#     assert df.to_csv(None, index=None) == expected

hestia_earth_utils-0.16.15/tests/test_cycle.py ADDED Viewed

@@ -0,0 +1,33 @@
+import os
+import json
+import pytest
+from tests.utils import fixtures_path
+from hestia_earth.utils.cycle import get_cycle_emissions_calculation_status
+fixtures_folder = os.path.join(fixtures_path, "blank_node", "calculation_status")
+_folders = [
+    d
+    for d in os.listdir(fixtures_folder)
+    if os.path.isdir(os.path.join(fixtures_folder, d))
+]
+@pytest.mark.parametrize("folder", _folders)
+def test_get_cycle_emissions_calculation_status(folder: str):
+    with open(
+        os.path.join(fixtures_folder, folder, "node.jsonld"), encoding="utf-8"
+    ) as f:
+        cycle = json.load(f)
+    with open(
+        os.path.join(
+            fixtures_folder, folder, "emissions-emission-with-missing-inputs.json"
+        ),
+        encoding="utf-8",
+    ) as f:
+        expected = json.load(f)
+    result = get_cycle_emissions_calculation_status(cycle)
+    print(json.dumps(result, indent=2))
+    assert result == expected

hestia_earth_utils-0.16.13/hestia_earth/utils/pivot/_shared.py DELETED Viewed

@@ -1,55 +0,0 @@
-import json
-import numpy as np
-from hestia_earth.schema import EmissionMethodTier
-EXCLUDE_FIELDS = ["@type", "type", "@context"]
-EXCLUDE_PRIVATE_FIELDS = [
-    "added",
-    "addedVersion",
-    "updated",
-    "updatedVersion",
-    "aggregatedVersion",
-    "_cache",
-]
-def _with_csv_formatting(dct):
-    """
-    Use as object_hook when parsing a JSON node: json.loads(node, object_hook=_with_csv_formatting).
-    Ensures parsed JSON has field values formatted according to hestia csv conventions.
-    """
-    if "boundary" in dct:
-        dct["boundary"] = json.dumps(dct["boundary"])
-    for key, value in dct.items():
-        if _is_scalar_list(value):
-            dct[key] = ";".join([str(el) for el in value])
-    return dct
-def _is_scalar_list(value):
-    if not isinstance(value, list):
-        return False
-    all_scalar = True
-    for element in value:
-        if not np.isscalar(element):
-            all_scalar = False
-            break
-    return all_scalar
-def _filter_not_relevant(blank_node: dict):
-    return blank_node.get("methodTier") != EmissionMethodTier.NOT_RELEVANT.value
-def _filter_emissions_not_relevant(node: dict):
-    """
-    Ignore all emissions where `methodTier=not relevant` so save space.
-    """
-    return node | (
-        {
-            key: list(filter(_filter_not_relevant, node[key]))
-            for key in ["emissions", "emissionsResourceUse"]
-            if key in node
-        }
-    )

hestia_earth_utils-0.16.13/hestia_earth/utils/version.py DELETED Viewed

	@@ -1 +0,0 @@
1	- VERSION = "0.16.13"

hestia_earth_utils-0.16.13/tests/test_calculation_status.py DELETED Viewed

@@ -1,40 +0,0 @@
-# import os
-# import json
-# from tests.utils import fixtures_path
-# from hestia_earth.utils.calculation_status import _emissions_with_status, get_nodes_calculations_status_dataframe
-# fixtures_folder = os.path.join(fixtures_path, 'calculation_status')
-# def test_emissions_with_status():
-#     with open(os.path.join(fixtures_folder, 'nodes.json')) as f:
-#         nodes = json.load(f)
-#     result = _emissions_with_status(nodes[0])
-#     assert result == {
-#         'emissions-total': 193,
-#         'emissions-complete': 56,
-#         'emissions-incomplete': 1,
-#         'emissions-missing': 136,
-#         'emissions': result['emissions']  # ignore
-#     }
-#     result = _emissions_with_status(nodes[1])
-#     assert result == {
-#         'emissions-total': 193,
-#         'emissions-complete': 0,
-#         'emissions-incomplete': 13,
-#         'emissions-missing': 180,
-#         'emissions': result['emissions']  # ignore
-#     }
-# def test_get_nodes_calculations_status_dataframe():
-#     with open(os.path.join(fixtures_folder, 'nodes.json')) as f:
-#         nodes = json.load(f)
-#     expected = open(os.path.join(fixtures_folder, 'result.csv'), 'r').read()
-#     df = get_nodes_calculations_status_dataframe(nodes, file_format='csv')
-#     assert df.to_csv(None, index=None) == expected

hestia_earth_utils-0.16.13/tests/test_cycle.py DELETED Viewed

@@ -1,18 +0,0 @@
-# import os
-# import json
-# from tests.utils import fixtures_path
-# from hestia_earth.utils.cycle import get_cycle_emissions_calculation_status
-# def test_get_cycle_emissions_calculation_status():
-#     folder = os.path.join(fixtures_path, 'blank_node', 'calculation_status', 'cycle')
-#     with open(f"{folder}/node.jsonld", encoding='utf-8') as f:
-#         cycle = json.load(f)
-#     with open(f"{folder}/emissions-emission-with-missing-inputs.json", encoding='utf-8') as f:
-#         expected = json.load(f)
-#     result = get_cycle_emissions_calculation_status(cycle)
-#     assert result == expected