PyPI - hestia-earth-utils - Versions diffs - 0.15.16__py3-none-any.whl → 0.16.2__py3-none-any.whl - Mend

hestia-earth-utils 0.15.16py3-none-any.whl → 0.16.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

hestia_earth/utils/__init__.py CHANGED Viewed

@@ -1,3 +0,0 @@
-from pkgutil import extend_path
-__path__ = extend_path(__path__, __name__)

hestia_earth/utils/csv_utils.py ADDED Viewed

@@ -0,0 +1,72 @@
+import io
+import csv
+import re
+import numpy as np
+_MISSING_VALUE = '-'
+_MISSING = -99999
+_DELIMITER = ','
+_QUOTE_CHAR = '"'
+ENCODING = 'ISO-8859-1'
+# default: " !#$%&'()*+,-./:;<=>?@[\\]^{|}~"
+_DELETE_CHARS = " !#$%&'()*,./:;<=>?@^{|}~"
+def is_missing_value(value): return value == _MISSING_VALUE or value == _MISSING or value == str(_MISSING)
+def _replace_missing_values(value: str): return str(_MISSING) if str(value) == _MISSING_VALUE else value
+def _replace_chars(value: str): return re.sub(f'[{re.escape(_DELETE_CHARS)}]', '', value.replace(' ', '_'))
+def _text_to_csv(csv_content: str):
+    return csv.reader(io.StringIO(csv_content.strip()), delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
+def _csv_reader_converter(field_str_bytes):
+    field_str = field_str_bytes if isinstance(field_str_bytes, str) else field_str_bytes.decode('utf-8')
+    reader = _text_to_csv(field_str)
+    try:
+        return _replace_missing_values(next(reader)[0].strip())
+    except StopIteration:
+        return str(_MISSING)
+def _get_columns(csv_content: str):
+    try:
+        reader = _text_to_csv(csv_content)
+        names = next(reader)
+        return list(map(_replace_chars, names))
+    except StopIteration:
+        return []
+def csv_str_to_recarray(csv_content: str) -> np.recarray:
+    names = _get_columns(csv_content)
+    num_cols = len(names)
+    converters_dict = {
+        i: _csv_reader_converter
+        for i in range(num_cols)
+    }
+    # TODO: find the maximum column size instead of using 1000
+    max_size = 1000
+    return np.loadtxt(
+        io.StringIO(csv_content.strip()),
+        delimiter=_DELIMITER,
+        quotechar=_QUOTE_CHAR,
+        skiprows=1,
+        converters=converters_dict,
+        dtype=[(name, f"U{max_size}") for name in names],
+        encoding=ENCODING
+    ).view(np.recarray)
+def csv_file_to_recarray(filepath: str):
+    with open(filepath, 'r', encoding=ENCODING) as f:
+        content = f.read()
+    return csv_str_to_recarray(content)

hestia_earth/utils/lookup.py CHANGED Viewed

@@ -1,51 +1,16 @@
 from functools import reduce
-from io import StringIO
 from typing import Union
-import re
 import requests
-import csv
 import numpy
+import traceback
 from .storage import _load_from_storage
 from .request import request_url, web_url
+from .csv_utils import csv_str_to_recarray, csv_file_to_recarray, is_missing_value, _replace_chars
-DELIMITER = '\t'
-ENCODING = 'ISO-8859-1'
-GLOSSARY_FOLDER = 'glossary/lookups'
+_GLOSSARY_FOLDER = 'glossary/lookups'
 _memory = {}
-MISSING_VALUE = '-'
-MISSING = -99999
 _INDEX_COL = 'termid'
-# default: " !#$%&'()*+,-./:;<=>?@[\\]^{|}~"
-_DELETE_CHARS = " !#$%&'()*,./:;<=>?@^{|}~"
-def _is_missing_value(value): return value == MISSING_VALUE or value == MISSING or value == str(MISSING)
-def _replace_missing_values(value: str): return str(MISSING) if str(value) == '-' else value
-def _rewrite_csv_file_as_tab(filepath: str):
-    with open(filepath, 'r', encoding=ENCODING) as fp:
-        reader = csv.reader(fp)
-        for row in reader:
-            yield DELIMITER.join(list(map(_replace_missing_values, row)))
-def _rewrite_csv_text_as_tab(text: str):
-    reader = csv.reader(StringIO(text))
-    for row in reader:
-        yield DELIMITER.join(list(map(_replace_missing_values, row)))
-def _recfromcsv(data): return numpy.recfromcsv(data,
-                                               missing_values=MISSING_VALUE,
-                                               filling_values=MISSING,
-                                               delimiter=DELIMITER,
-                                               encoding=ENCODING,
-                                               case_sensitive=True,
-                                               deletechars=_DELETE_CHARS)
 def _memory_wrapper(key: str, func):
@@ -70,12 +35,12 @@ def load_lookup(filepath: str, keep_in_memory: bool = False):
     numpy.recarray
         The `numpy.recarray` converted from the csv content.
     """
-    def load(): return _recfromcsv(_rewrite_csv_file_as_tab(filepath))
+    def load(): return csv_file_to_recarray(filepath)
     return _memory_wrapper(filepath, load) if keep_in_memory else load()
 def _download_lookup_data(filename: str):
-    filepath = f"{GLOSSARY_FOLDER}/{filename}"
+    filepath = f"{_GLOSSARY_FOLDER}/{filename}"
     def fallback():
         url = request_url(f"{web_url()}/{filepath}")
@@ -121,12 +86,14 @@ def download_lookup(filename: str, keep_in_memory: bool = True, build_index: boo
     """
     def load():
         data = _download_lookup_data(filename)
-        rec = _recfromcsv(_rewrite_csv_text_as_tab(data)) if data else None
+        rec = csv_str_to_recarray(data) if data else None
         return (_build_index(rec) if build_index else rec) if data else None
     try:
         return _memory_wrapper(filename, load) if keep_in_memory else load()
     except Exception:
+        stack = traceback.format_exc()
+        print(stack)
         return None
@@ -144,7 +111,19 @@ def column_name(key: str):
     str
         The column name that can be used in `get_table_value`.
     """
-    return re.sub('[' + re.escape(_DELETE_CHARS) + ']', '', key.replace(' ', '_')) if key else ''
+    return _replace_chars(key) if key else ''
+def _parse_value(value: str):
+    """ Automatically converts the value to float or bool if possible """
+    try:
+        return (
+            True if str(value).lower() == 'true' else
+            False if str(value).lower() == 'false' else
+            float(value)
+        )
+    except Exception:
+        return value
 def _get_single_table_value(data: Union[dict, numpy.recarray], col_match: str, col_match_with, col_val):
@@ -191,7 +170,7 @@ def get_table_value(lookup: Union[dict, numpy.recarray], col_match: str, col_mat
             _get_single_table_value(lookup, col_match, col_match_with, col_val) if single else
             _get_multiple_table_values(lookup, col_match, col_match_with, col_val)
         )
-        return None if _is_missing_value(value) else value
+        return None if is_missing_value(value) else _parse_value(value)
     except Exception:
         return None
@@ -251,7 +230,7 @@ def extract_grouped_data(data: str, key: str) -> str:
         **{curr.split(':')[0]: curr.split(':')[1]}
     }, data.split(';'), {}) if data is not None and isinstance(data, str) and len(data) > 1 else {}
     value = grouped_data.get(key)
-    return None if _is_missing_value(value) else value
+    return None if is_missing_value(value) else _parse_value(value)
 def extract_grouped_data_closest_date(data: str, year: int) -> str:
@@ -278,13 +257,13 @@ def extract_grouped_data_closest_date(data: str, year: int) -> str:
         lambda prev, curr: {
             **prev,
             **{curr.split(':')[0]: curr.split(':')[1]}
-        } if len(curr) > 0 and not _is_missing_value(curr.split(':')[1]) else prev,
+        } if len(curr) > 0 and not is_missing_value(curr.split(':')[1]) else prev,
         data.split(';'),
         {}
     ) if data is not None and isinstance(data, str) and len(data) > 1 else {}
     dist_years = list(data_by_date.keys())
     closest_year = min(dist_years, key=lambda x: abs(int(x) - year)) if len(dist_years) > 0 else None
-    return None if closest_year is None else data_by_date.get(closest_year)
+    return None if closest_year is None else _parse_value(data_by_date.get(closest_year))
 def lookup_term_ids(lookup: Union[dict, numpy.recarray]):

hestia_earth/utils/pivot/pivot_csv.py CHANGED Viewed

@@ -2,6 +2,7 @@ import copy
 import json
 import re
 import numpy as np
+import pandas as pd
 from hestia_earth.schema import UNIQUENESS_FIELDS, Term, NODE_TYPES
 from hestia_earth.schema.utils.sort import get_sort_key, SORT_CONFIG
 from flatten_json import flatten as flatten_json
@@ -11,17 +12,6 @@ from ..api import find_term_ids_by_names
 from ._shared import EXCLUDE_FIELDS, EXCLUDE_PRIVATE_FIELDS, _with_csv_formatting, _filter_emissions_not_relevant
-PANDAS_IMPORT_ERROR_MSG = "Run `pip install pandas>=1.2` to use this functionality"
-try:
-    import pandas as pd
-    version = [int(x) for x in pd.__version__.split('+')[0].split(".")]
-    if version[0] < 1 or (version[0] == 1 and version[1] < 2):
-        raise ImportError(PANDAS_IMPORT_ERROR_MSG)
-except ImportError:
-    raise ImportError(PANDAS_IMPORT_ERROR_MSG)
 # We only want to pivot array items containing blank nodes
 # Assume these are all fields with uniqueness fields not of type Node
 def _get_blank_node_uniqueness_fields():

hestia_earth/utils/table.py CHANGED Viewed

@@ -1,22 +1,12 @@
 from functools import reduce
 import numpy as np
+import pandas as pd
 from hestia_earth.schema import NodeType
 # __package__ = "hestia_earth.utils" # required to run interactively in vscode
 from .tools import flatten
-PANDAS_IMPORT_ERROR_MSG = "Run `pip install pandas>=1.2` to use this functionality"
-try:
-    import pandas as pd
-    version = [int(x) for x in pd.__version__.split('+')[0].split(".")]
-    if version[0] < 1 or (version[0] == 1 and version[1] < 2):
-        raise ImportError(PANDAS_IMPORT_ERROR_MSG)
-except ImportError:
-    raise ImportError(PANDAS_IMPORT_ERROR_MSG)
 def _replace_ids(df):
     # in columns, first letter is always lower case
     node_types = [e.value[0].lower() + e.value[1:] for e in NodeType]
@@ -74,11 +64,6 @@ def format_for_upload(filepath: str):
     pandas.DataFrame
         Formatted pandas dataframe
     """
-    try:
-        import pandas as pd
-    except ImportError:
-        raise ImportError("Run `pip install pandas~=1.2.0` to use this functionality")
     df = pd.read_csv(filepath, index_col=None, na_values="")
     # replace @id with id for top-level Node

hestia_earth/utils/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- VERSION = '0.15.16'
1	+ VERSION = '0.16.2'

{hestia_earth_utils-0.15.16.dist-info → hestia_earth_utils-0.16.2.dist-info}/METADATA RENAMED Viewed

@@ -1,22 +1,35 @@
-Metadata-Version: 2.1
-Name: hestia-earth-utils
-Version: 0.15.16
+Metadata-Version: 2.4
+Name: hestia_earth_utils
+Version: 0.16.2
 Summary: HESTIA's utils library
 Home-page: https://gitlab.com/hestia-earth/hestia-utils
 Author: HESTIA Team
 Author-email: guillaumeroyer.mail@gmail.com
 License: MIT
-Platform: UNKNOWN
-Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
-Classifier: Programming Language :: Python :: 3.9
-Requires-Python: >=3.9
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.12
 Description-Content-Type: text/markdown
-Requires-Dist: hestia-earth.schema>=24.3.0
+Requires-Dist: hestia-earth-schema>=35.0.1
 Requires-Dist: requests>=2.24.0
 Requires-Dist: urllib3~=1.26.0
 Requires-Dist: python-dateutil>=2.8.1
-Requires-Dist: numpy<2,>=1.25.0
-Requires-Dist: flatten-json
+Requires-Dist: numpy>=2
+Requires-Dist: flatten_json
+Provides-Extra: pivot-csv
+Requires-Dist: pandas>=2; extra == "pivot-csv"
+Provides-Extra: table
+Requires-Dist: pandas>=2; extra == "table"
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license
+Dynamic: provides-extra
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
 # HESTIA Utils
@@ -66,5 +79,3 @@ from hestia_earth.utils.lookup import download_lookup
 df = download_lookup('crop.csv')
 ```

{hestia_earth_utils-0.15.16.dist-info → hestia_earth_utils-0.16.2.dist-info}/RECORD RENAMED Viewed

@@ -1,37 +1,34 @@
-hestia_earth/__init__.py,sha256=G-d438vPx7m_ks5e9XTtM3u7LDRO5dSSukibukWmyPM,56
-hestia_earth/utils/__init__.py,sha256=qEFeq3yuf3lQKVseALmL8aPM8fpCS54B_5pry00M3hk,76
+hestia_earth/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hestia_earth/utils/api.py,sha256=y0gw5pCCHNnFIhM62Hok_5eDtH3QDAZdkye_1mANMNs,9654
 hestia_earth/utils/blank_node.py,sha256=1wc9zUkOvFhJS-YmuKexfIdYxfsp5KyJczLmHlW559Q,7375
 hestia_earth/utils/calculation_status.py,sha256=X7lbgVMD9luH1gj9lEcxd3_P2-u7e8ZPGCvX1czPZUo,2238
+hestia_earth/utils/csv_utils.py,sha256=nb_ihJaTj3K5hO7cxXO1xjTLVGVX1P13m9SgquO5-XY,1990
 hestia_earth/utils/cycle.py,sha256=rFLRL9X4KQ1UrE6fEPA_gV8KmwzrZpR3Ce56zg41lRk,1326
 hestia_earth/utils/date.py,sha256=SPQ69uxHiv1o3BqIkBKkM5XX_CmS20CB7g6u2rhsdh8,1807
 hestia_earth/utils/descriptive_stats.py,sha256=EMVwFvg2OnZgKRAfireAoWY2EbrSvqR0V0bK9B53p28,1583
 hestia_earth/utils/emission.py,sha256=BhBitooLTxZSh82S982v2QfPxxTF1kmGClG_uHyWdz4,1981
-hestia_earth/utils/lookup.py,sha256=0RLqy3HPzkbhkRaO7fYoHU0jKhAYzI6QHMptMEbqTlg,10344
+hestia_earth/utils/lookup.py,sha256=NoEv0Hd496I9kf-shYXYUwNabatjc_uO9Ade8J98oBI,9490
 hestia_earth/utils/lookup_utils.py,sha256=_k3RZ1pK-gw7jq8wn9HrPWfDl4FlEWRb8bXmgaARu0w,6716
 hestia_earth/utils/model.py,sha256=uUcrF07XmBzqLni8VSaP0HoebJnQ57kk0EOmhwYMbfI,4637
 hestia_earth/utils/pipeline.py,sha256=O-6DPtK0U1lJ51LFGa1gM6pjkBJUfxOjNjY8LxQPXV0,9588
 hestia_earth/utils/request.py,sha256=bu7hkWKmFdXl2_Feawiam_x32whlclA9oP0asJyC69k,626
 hestia_earth/utils/stats.py,sha256=4t3op10xDJbGxWJEY1Jtyl302PYWyMFwLpsSkMlzQn8,34667
-hestia_earth/utils/table.py,sha256=RrTt-KF_QzjKiCpaAueoG6La1FG-Iusxw5NMDpoRBpQ,2861
+hestia_earth/utils/table.py,sha256=MOJDo5fQPRDogAty_UXbO9-EXFwz97m0f7--mOM17lQ,2363
 hestia_earth/utils/term.py,sha256=6LiUSc6KX3IOkfWF6fYkQ2tENCO8ENljcdDypxU6WtA,1060
 hestia_earth/utils/tools.py,sha256=9GaUJwxL-CTzEOGnRFkUQDVFelPevQSxXrf25vssCVo,4990
-hestia_earth/utils/version.py,sha256=JDQ_516e1l28tU9cgTLDX-12-rAvR8X42sQ5QDTbtTU,20
+hestia_earth/utils/version.py,sha256=8a5HJaemwtN_jTS8fGe4SSrLufF3bwMJrcS4e735nPY,19
 hestia_earth/utils/pivot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hestia_earth/utils/pivot/_shared.py,sha256=JnyIOzpans45DE2hSa9-4yvNhq8t08lx1IAWGJi6WPQ,1397
-hestia_earth/utils/pivot/pivot_csv.py,sha256=zaiDcig4I5lVSHPZ-2bXKKBcIRrayA0GUaw0c8H3D-w,12371
+hestia_earth/utils/pivot/pivot_csv.py,sha256=7f6kMqeb1b3RKANLGeDgVu8G5WC-vXIijHnsJhO-CjI,12022
 hestia_earth/utils/pivot/pivot_json.py,sha256=GBu5CFgCNdFjAuKGNsk2Phgds-xp4iREa5YIrplpFwA,9801
 hestia_earth/utils/storage/__init__.py,sha256=uNX6_EHWWnNUIm4Ng7L43-cQmuc6NGFAxXye85saIXQ,922
 hestia_earth/utils/storage/_azure_client.py,sha256=sevCZni04eknMql2DgUsWG23f7u0KvsXP7me1ZUBy00,1274
 hestia_earth/utils/storage/_local_client.py,sha256=IbzziUKY0QS3ybHFfgEpELqvafa7hQnZ-DdGdjQuypE,515
 hestia_earth/utils/storage/_s3_client.py,sha256=B2yTsf-VfHcRLCKTMes4S_nCXxrZad9umyZx3b5Pu_c,3181
 hestia_earth/utils/storage/_sns_client.py,sha256=LowUatj78Egu6_Id6Rr7hZjfZx1WguS3lozB3yAwSps,347
-hestia_earth_utils-0.15.16.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
-hestia_earth_utils-0.15.16.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
-tests/pivot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-tests/pivot/test_pivot_csv.py,sha256=aYni7o3QDPSgtVxVCspEetotgpYHY7Lz5VHf-DR89gw,8131
-tests/pivot/test_pivot_json.py,sha256=UYTAN4AZhzVicIYsU1A2VgJcctUXohjHppg6s-pqwcg,8287
-hestia_earth_utils-0.15.16.dist-info/METADATA,sha256=nTJS2R1fi2c9Lz3R7zvRuf8HKG1n7K72KoDek_C9LpU,1758
-hestia_earth_utils-0.15.16.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
-hestia_earth_utils-0.15.16.dist-info/top_level.txt,sha256=1dqA9TqpOLTEgpqa-YBsmbCmmNU1y56AtfFGEceZ2A0,19
-hestia_earth_utils-0.15.16.dist-info/RECORD,,
+hestia_earth_utils-0.16.2.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
+hestia_earth_utils-0.16.2.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
+hestia_earth_utils-0.16.2.dist-info/METADATA,sha256=mz11GR2ctUEK-YYl0x2s4f1UVFwUAKb4rt-L-MHnItA,2030
+hestia_earth_utils-0.16.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+hestia_earth_utils-0.16.2.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
+hestia_earth_utils-0.16.2.dist-info/RECORD,,

{hestia_earth_utils-0.15.16.dist-info → hestia_earth_utils-0.16.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.45.1)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{hestia_earth_utils-0.15.16.dist-info → hestia_earth_utils-0.16.2.dist-info}/top_level.txt RENAMED Viewed

	@@ -1,2 +1 @@
1 1	hestia_earth
2	- tests

hestia_earth/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- __import__('pkg_resources').declare_namespace(__name__)

tests/pivot/__init__.py DELETED Viewed

File without changes

tests/pivot/test_pivot_csv.py DELETED Viewed

@@ -1,267 +0,0 @@
-import os
-import pandas as pd
-from unittest.mock import patch, call
-from tests.utils import fixtures_path
-from hestia_earth.utils.pivot.pivot_csv import pivot_csv, pivot_hestia_file
-class_path = 'hestia_earth.utils.pivot.pivot_csv'
-fixtures_folder = os.path.join(fixtures_path, 'pivot', 'pivot_csv')
-@patch(
-    f"{class_path}.find_term_ids_by_names",
-    return_value={
-        "Full tillage": "fullTillage",
-        "Diesel": "diesel",
-        "Inorganic Potassium fertiliser, unspecified (kg K2O)": "inorganicPotassiumFertiliserUnspecifiedKgK2O",
-        "Inorganic Phosphorus fertiliser, unspecified (kg P2O5)": "inorganicPhosphorusFertiliserUnspecifiedKgP2O5",
-        "Urea (kg N)": "ureaKgN",
-        "Peanut, in shell": "peanutInShell",
-    },
-)
-def test_pivot_csv_cycle(mock):
-    filepath = f"{fixtures_folder}/cycle.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/cycle-pivoted.csv", index_col=None, dtype=object
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-    mock.assert_has_calls(
-        [
-            call(
-                [
-                    "Diesel",
-                    "Full tillage",
-                    "Inorganic Phosphorus fertiliser, unspecified (kg P2O5)",
-                    "Inorganic Potassium fertiliser, unspecified (kg K2O)",
-                    "Peanut, in shell",
-                    "Urea (kg N)",
-                ]
-            )
-        ]
-    )
-@patch(
-    f"{class_path}.find_term_ids_by_names",
-    return_value={
-        "Eutrophication potential, excluding fate": "eutrophicationPotentialExcludingFate",
-        "GWP100": "gwp100",
-        "N2O, to air, organic fertiliser, direct": "n2OToAirOrganicFertiliserDirect",
-        "N2O, to air, inorganic fertiliser, direct": "n2OToAirInorganicFertiliserDirect",
-    },
-)
-def test_pivot_csv_impact(mock):
-    filepath = f"{fixtures_folder}/impact.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/impact-pivoted.csv", index_col=None, dtype=object
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-    mock.assert_has_calls(
-        [
-            call(
-                [
-                    "Eutrophication potential, excluding fate",
-                    "GWP100",
-                    "N2O, to air, inorganic fertiliser, direct",
-                    "N2O, to air, organic fertiliser, direct",
-                ]
-            )
-        ]
-    )
-def test_pivot_csv_multinode_rows():
-    filepath = f"{fixtures_folder}/multinode-rows.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/multinode-rows-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-@patch(
-    f"{class_path}.find_term_ids_by_names",
-    return_value={"Urea (kg N)": "ureaKgN"},
-)
-def test_pivot_csv_cycle_missing_ids(mock):
-    filepath = f"{fixtures_folder}/missing-ids.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/missing-ids-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-    mock.assert_has_calls([call(["Urea (kg N)"])])
-@patch(
-    f"{class_path}.find_term_ids_by_names",
-    return_value={"Irrigated": "irrigated"},
-)
-def test_pivot_csv_empty_cells(mock):
-    filepath = f"{fixtures_folder}/empty-cells.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/empty-cells-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-def test_pivot_csv_preserves_uniqueness_fields():
-    filepath = f"{fixtures_folder}/uniqueness-fields-undifferentiating.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/uniqueness-fields-undifferentiating-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-@patch(
-    f"{class_path}.find_term_ids_by_names",
-    return_value={
-        "Helicopter use, operation unspecified": "helicopterUseOperationUnspecified",
-        "Cooling, with evaporative cooling tower": "coolingWithEvaporativeCoolingTower",
-        "Small tractor use, operation unspecified": "smallTractorUseOperationUnspecified",
-        "Coating seeds": "coatingSeeds",
-        "Buttage of vine": "buttageOfVine",
-    },
-)
-def test_pivot_csv_uniqueness_fields_differentiating(mock):
-    filepath = f"{fixtures_folder}/uniqueness-fields-differentiating.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/uniqueness-fields-differentiating-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-    mock.assert_has_calls(
-        [
-            call(
-                [
-                    "Buttage of vine",
-                    "Coating seeds",
-                    "Cooling, with evaporative cooling tower",
-                    "Helicopter use, operation unspecified",
-                    "Small tractor use, operation unspecified",
-                ]
-            )
-        ]
-    )
-@patch(
-    f"{class_path}.find_term_ids_by_names",
-    return_value={
-        "Cooling, with evaporative cooling tower": "coolingWithEvaporativeCoolingTower",
-    },
-)
-def test_pivot_csv_uniqueness_fields_non_matching(mock):
-    filepath = f"{fixtures_folder}/uniqueness-fields-non-matching.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/uniqueness-fields-non-matching-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-    mock.assert_has_calls([call(["Cooling, with evaporative cooling tower"])])
-@patch(
-    f"{class_path}.find_term_ids_by_names",
-    return_value={
-        "Nitrogen content": "nitrogenContent",
-    },
-)
-def test_pivot_csv_properties(mock):
-    filepath = f"{fixtures_folder}/properties-exception.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/properties-exception-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-    mock.assert_has_calls([call(["Nitrogen content"])])
-def test_pivot_csv_depth():
-    filepath = f"{fixtures_folder}/depth-exception.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/depth-exception-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-def test_pivot_csv_shuffled():
-    filepath = f"{fixtures_folder}/shuffled.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/shuffled-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-@patch(
-    f"{class_path}.find_term_ids_by_names",
-    return_value={"Full tillage": "fullTillage", "Urea (kg N)": "ureaKgN"},
-)
-def test_pivot_csv_cycle_deep(*args):
-    filepath = f"{fixtures_folder}/deep.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/deep-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-def test_pivot_csv_non_node_arrayfields(*args):
-    filepath = f"{fixtures_folder}/non-node-arrayfields.csv"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/non-node-arrayfields-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    df = pivot_csv(filepath)
-    assert df.to_csv() == expected.to_csv()
-@patch(
-    f"{class_path}.find_term_ids_by_names",
-    return_value={
-        "Grinding, with grinder": "grinding",
-        "Motor gasoline": "motorGasoline",
-        "Orchard density": "orchardDensity",
-    },
-)
-def test_pivot_hestia_file(*args):
-    filepath = f"{fixtures_folder}/nodes.hestia"
-    expected = pd.read_csv(
-        f"{fixtures_folder}/nodes.hestia-pivoted.csv",
-        index_col=None,
-        dtype=object,
-    )
-    with open(filepath) as fd:
-        hestia_file = fd.read()
-    df = pivot_hestia_file(hestia_file)
-    assert df.to_csv() == expected.to_csv()

tests/pivot/test_pivot_json.py DELETED Viewed

@@ -1,231 +0,0 @@
-import os
-import json
-import re
-import numpy as np
-import pandas as pd
-from tests.utils import fixtures_path
-from hestia_earth.utils.pivot.pivot_json import (
-    _with_csv_formatting,
-    pivot_nodes,
-    pivot_hestia_file,
-)
-from flatten_json import unflatten_list
-from hestia_earth.schema.utils.sort import SORT_CONFIG
-from hestia_earth import schema
-class_path = 'hestia_earth.utils.pivot.pivot_csv'
-fixtures_folder = os.path.join(fixtures_path, 'pivot', 'pivot_json')
-node_types = {k: getattr(schema, k)().fields for k in schema.SCHEMA_TYPES}
-name_to_ids_mapping = {
-    "Full tillage": "fullTillage",
-    "Diesel": "diesel",
-    "Motor gasoline": "motorGasoline",
-    "Inorganic Potassium fertiliser, unspecified (kg K2O)": "inorganicPotassiumFertiliserUnspecifiedKgK2O",
-    "Inorganic Phosphorus fertiliser, unspecified (kg P2O5)": "inorganicPhosphorusFertiliserUnspecifiedKgP2O5",
-    "Urea (kg N)": "ureaKgN",
-    "Peanut, in shell": "peanutInShell",
-    "Eutrophication potential, excluding fate": "eutrophicationPotentialExcludingFate",
-    "GWP100": "gwp100",
-    "N2O, to air, organic fertiliser, direct": "n2OToAirOrganicFertiliserDirect",
-    "N2O, to air, inorganic fertiliser, direct": "n2OToAirInorganicFertiliserDirect",
-    "Irrigated": "irrigated",
-    "Helicopter use, operation unspecified": "helicopterUseOperationUnspecified",
-    "Cooling, with evaporative cooling tower": "coolingWithEvaporativeCoolingTower",
-    "Small tractor use, operation unspecified": "smallTractorUseOperationUnspecified",
-    "Coating seeds": "coatingSeeds",
-    "Buttage of vine": "buttageOfVine",
-    "Nitrogen content": "nitrogenContent",
-    "Grinding, with grinder": "grinding",
-    "Orchard density": "orchardDensity",
-}
-def _get_node_type(col):
-    label = col.split(".")[0]
-    return label[0].upper() + label[1:]
-def _add_missing_fields(row, is_input, col, parent_type, prefix=""):
-    subnode_col = re.search(r"(.+?\.\d+)\.(.+)", col)
-    if not subnode_col:
-        return None
-    sub_node, deep_col = subnode_col.groups()
-    node_type = (
-        # We are not handling fields like subnode_type_A.subnode_type_B.0
-        # We are always fetching type_A in this scenario.
-        SORT_CONFIG.get(parent_type)
-        .get(sub_node.split(".")[0])
-        .get("type")
-    )
-    next_prefix = ".".join([el for el in (prefix, sub_node) if el])
-    row[f"{next_prefix}.@type"] = node_type
-    _add_missing_fields(row, is_input, deep_col, node_type, prefix=next_prefix)
-def _row_to_dict(row, is_input, parent_type):
-    row.dropna(inplace=True)
-    if is_input:
-        for col in row.index:
-            _add_missing_fields(row, is_input, col, parent_type)
-    return row.to_dict()
-def _df_to_dict(df, is_input):
-    df.index = map(lambda col: ".".join(col.split(".")[1:]), df.index)
-    df.loc["@type"] = df.name
-    dicts = df.apply(_row_to_dict, is_input=is_input, parent_type=df.name)
-    return dicts
-def _ensure_id_cols(df, name_to_ids):
-    names_df = df.filter(regex=r"\.name", axis=1)
-    for name_col in names_df.columns:
-        id_col = name_col.replace(".name", ".@id")
-        for idx, name in df[name_col].items():
-            if id_col not in df:
-                df[id_col] = np.nan
-            if pd.isna(df.loc[idx, id_col]):
-                df.loc[idx, id_col] = name_to_ids[name]
-def _convert_csv_to_nodes(fixture, is_input, name_to_ids):
-    """
-    Gets json fixtures or creates them from corresponding csv files.
-    Conversion for *-pivoted files is not perfect as we do not detect
-    the difference between an empty cell which should be discarded
-    (ie. header not used by a row) and a node without a value key
-    (the latter are represented in csv as field.nodeId.value = None)
-    """
-    filepath = (
-        f"{fixtures_path}/pivot/pivot_csv/{fixture}.csv"
-        if is_input
-        else f"{fixtures_path}/pivot/pivot_csv/{fixture}-pivoted.csv"
-    )
-    df = pd.read_csv(filepath, index_col=None, dtype=object)
-    df.drop(columns="-", errors="ignore", inplace=True)
-    df.replace("-", np.nan, inplace=True)
-    df.replace(
-        ["TRUE", "True", "true", "FALSE", "False", "false"],
-        [True, True, True, False, False, False],
-        inplace=True,
-    )
-    if is_input:
-        df.dropna(how="all", axis=1, inplace=True)
-    df.rename(lambda col: col.replace(".id", ".@id"), axis=1, inplace=True)
-    if is_input:
-        _ensure_id_cols(df, name_to_ids)
-    df = df.T.groupby(_get_node_type).apply(_df_to_dict, is_input)
-    nodes = [
-        node for _node_type, nodes in df.iterrows() for node in nodes if node.get("@id")
-    ]
-    return nodes
-def get_nodes_from_fixture(fixture, name_to_ids={}):
-    try:
-        with open(f"{fixtures_folder}/{fixture}.json") as file:
-            input = json.load(file, object_hook=_with_csv_formatting)["nodes"]
-        with open(f"{fixtures_folder}/{fixture}-pivoted.json") as file:
-            expected = json.load(file, object_hook=_with_csv_formatting)["nodes"]
-    except FileNotFoundError:
-        print(f"\n{fixture} not found: attempting to create from csv.\n")
-        name_to_ids.update({np.nan: np.nan})
-        input = _convert_csv_to_nodes(fixture, True, name_to_ids)
-        expected = _convert_csv_to_nodes(fixture, False, name_to_ids)
-        input, expected = (
-            [unflatten_list(node, ".") for node in input],
-            [unflatten_list(node, ".") for node in expected],
-        )
-        with open(f"{fixtures_folder}/{fixture}.json", "w") as file:
-            file.write(json.dumps({"nodes": input}, sort_keys=True, indent=2))
-        with open(
-            f"{fixtures_folder}/{fixture}-pivoted.json", "w"
-        ) as file:
-            file.write(json.dumps({"nodes": expected}, sort_keys=True, indent=2))
-    return (input, expected)
-def test_pivot_json_cycle():
-    input, expected = get_nodes_from_fixture("cycle", name_to_ids_mapping)
-    actual = pivot_nodes(input)
-    assert expected == actual
-def test_pivot_json_impact():
-    input, expected = get_nodes_from_fixture("impact", name_to_ids_mapping)
-    actual = pivot_nodes(input)
-    assert expected == actual
-def test_pivot_json_multinode_rows():
-    input, expected = get_nodes_from_fixture("multinode-rows")
-    actual = pivot_nodes(input)
-    assert expected == actual
-def test_pivot_json_preserves_uniqueness_fields():
-    input, expected = get_nodes_from_fixture(
-        "uniqueness-fields-undifferentiating", name_to_ids_mapping
-    )
-    actual = pivot_nodes(input)
-    assert expected == actual
-def test_pivot_json_uniqueness_fields_differentiating():
-    input, expected = get_nodes_from_fixture(
-        "uniqueness-fields-differentiating", name_to_ids_mapping
-    )
-    actual = pivot_nodes(input)
-    assert expected == actual
-# Output differs from CSV pivoter (see https://gitlab.com/hestia-earth/hestia-utils/-/issues/32)
-def test_pivot_json_uniqueness_fields_non_matching():
-    input, expected = get_nodes_from_fixture("uniqueness-fields-non-matching", name_to_ids_mapping)
-    actual = pivot_nodes(input)
-    assert expected == actual
-def test_pivot_json_properties():
-    input, expected = get_nodes_from_fixture("properties-exception", name_to_ids_mapping)
-    actual = pivot_nodes(input)
-    assert expected == actual
-def test_pivot_json_depth():
-    input, expected = get_nodes_from_fixture("depth-exception")
-    actual = pivot_nodes(input)
-    assert expected == actual
-# Output differs from CSV pivoter (see https://gitlab.com/hestia-earth/hestia-utils/-/issues/32)
-def test_pivot_json_cycle_deep():
-    input, expected = get_nodes_from_fixture("deep", name_to_ids_mapping)
-    actual = pivot_nodes(input)
-    assert expected == actual
-def test_pivot_json_node_arrayfields_merged():
-    input, expected = get_nodes_from_fixture("node-arrayfields-merged")
-    actual = pivot_nodes(input)
-    assert expected == actual
-def test_pivot_json_unindexed_node():
-    input, expected = get_nodes_from_fixture("unindexed-node")
-    actual = pivot_nodes(input)
-    assert expected == actual
-def test_pivot_hestia_file():
-    _input, expected = get_nodes_from_fixture("nodes.hestia", name_to_ids_mapping)
-    actual = pivot_hestia_file(
-        open(f"{fixtures_folder}/nodes.hestia.json", "r").read()
-    )
-    assert expected == actual

{hestia_earth_utils-0.15.16.data → hestia_earth_utils-0.16.2.data}/scripts/hestia-format-upload RENAMED Viewed

File without changes

{hestia_earth_utils-0.15.16.data → hestia_earth_utils-0.16.2.data}/scripts/hestia-pivot-csv RENAMED Viewed

File without changes

hestia-earth-utils 0.15.16__py3-none-any.whl → 0.16.2__py3-none-any.whl

hestia-earth-utils 0.15.16py3-none-any.whl → 0.16.2py3-none-any.whl