hestia-earth-models 0.57.2__py3-none-any.whl → 0.59.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hestia-earth-models might be problematic. Click here for more details.
- hestia_earth/models/cycle/aboveGroundCropResidueTotal.py +17 -12
- hestia_earth/models/cycle/excretaKgMass.py +4 -5
- hestia_earth/models/cycle/excretaKgN.py +4 -5
- hestia_earth/models/cycle/excretaKgVs.py +4 -5
- hestia_earth/models/cycle/inorganicFertiliser.py +2 -2
- hestia_earth/models/cycle/{irrigated.py → irrigatedTypeUnspecified.py} +4 -4
- hestia_earth/models/cycle/liveAnimal.py +9 -11
- hestia_earth/models/cycle/milkYield.py +154 -0
- hestia_earth/models/cycle/residueIncorporated.py +1 -1
- hestia_earth/models/cycle/utils.py +6 -0
- hestia_earth/models/emepEea2019/nh3ToAirInorganicFertiliser.py +3 -3
- hestia_earth/models/faostat2018/seed.py +2 -3
- hestia_earth/models/geospatialDatabase/clayContent.py +17 -4
- hestia_earth/models/geospatialDatabase/sandContent.py +17 -4
- hestia_earth/models/geospatialDatabase/siltContent.py +2 -2
- hestia_earth/models/impact_assessment/irrigated.py +0 -3
- hestia_earth/models/ipcc2006/co2ToAirOrganicSoilCultivation.py +2 -2
- hestia_earth/models/ipcc2006/n2OToAirCropResidueDecompositionIndirect.py +2 -2
- hestia_earth/models/ipcc2006/n2OToAirExcretaDirect.py +1 -1
- hestia_earth/models/ipcc2006/n2OToAirExcretaIndirect.py +8 -4
- hestia_earth/models/ipcc2006/n2OToAirInorganicFertiliserDirect.py +4 -1
- hestia_earth/models/ipcc2006/n2OToAirInorganicFertiliserIndirect.py +1 -1
- hestia_earth/models/ipcc2006/n2OToAirOrganicFertiliserDirect.py +1 -1
- hestia_earth/models/ipcc2006/n2OToAirOrganicFertiliserIndirect.py +1 -1
- hestia_earth/models/ipcc2006/utils.py +11 -8
- hestia_earth/models/ipcc2019/ch4ToAirEntericFermentation.py +4 -4
- hestia_earth/models/ipcc2019/ch4ToAirFloodedRice.py +16 -7
- hestia_earth/models/ipcc2019/co2ToAirSoilCarbonStockChangeManagementChange.py +759 -0
- hestia_earth/models/ipcc2019/croppingDuration.py +12 -6
- hestia_earth/models/ipcc2019/n2OToAirCropResidueDecompositionDirect.py +5 -52
- hestia_earth/models/ipcc2019/n2OToAirInorganicFertiliserDirect.py +104 -0
- hestia_earth/models/ipcc2019/n2OToAirInorganicFertiliserIndirect.py +1 -1
- hestia_earth/models/ipcc2019/n2OToAirOrganicFertiliserDirect.py +105 -0
- hestia_earth/models/ipcc2019/n2OToAirOrganicFertiliserIndirect.py +1 -1
- hestia_earth/models/ipcc2019/no3ToGroundwaterCropResidueDecomposition.py +1 -1
- hestia_earth/models/ipcc2019/no3ToGroundwaterExcreta.py +1 -1
- hestia_earth/models/ipcc2019/no3ToGroundwaterInorganicFertiliser.py +1 -1
- hestia_earth/models/ipcc2019/no3ToGroundwaterOrganicFertiliser.py +1 -1
- hestia_earth/models/ipcc2019/organicCarbonPerHa.py +1088 -1268
- hestia_earth/models/ipcc2019/pastureGrass.py +4 -4
- hestia_earth/models/ipcc2019/utils.py +102 -1
- hestia_earth/models/koble2014/aboveGroundCropResidue.py +15 -17
- hestia_earth/models/koble2014/cropResidueManagement.py +2 -2
- hestia_earth/models/koble2014/utils.py +19 -3
- hestia_earth/models/linkedImpactAssessment/__init__.py +4 -2
- hestia_earth/models/log.py +15 -3
- hestia_earth/models/mocking/search-results.json +184 -118
- hestia_earth/models/pooreNemecek2018/excretaKgN.py +6 -7
- hestia_earth/models/pooreNemecek2018/excretaKgVs.py +7 -6
- hestia_earth/models/pooreNemecek2018/no3ToGroundwaterCropResidueDecomposition.py +3 -2
- hestia_earth/models/pooreNemecek2018/no3ToGroundwaterExcreta.py +3 -2
- hestia_earth/models/pooreNemecek2018/no3ToGroundwaterInorganicFertiliser.py +3 -2
- hestia_earth/models/pooreNemecek2018/saplings.py +0 -1
- hestia_earth/models/site/management.py +168 -0
- hestia_earth/models/site/organicCarbonPerHa.py +251 -89
- hestia_earth/models/stehfestBouwman2006/n2OToAirCropResidueDecompositionDirect.py +3 -2
- hestia_earth/models/stehfestBouwman2006/n2OToAirExcretaDirect.py +3 -2
- hestia_earth/models/stehfestBouwman2006/n2OToAirInorganicFertiliserDirect.py +3 -2
- hestia_earth/models/stehfestBouwman2006/n2OToAirOrganicFertiliserDirect.py +3 -2
- hestia_earth/models/stehfestBouwman2006/noxToAirCropResidueDecomposition.py +3 -2
- hestia_earth/models/stehfestBouwman2006/noxToAirExcreta.py +3 -2
- hestia_earth/models/stehfestBouwman2006/noxToAirInorganicFertiliser.py +3 -2
- hestia_earth/models/stehfestBouwman2006/noxToAirOrganicFertiliser.py +3 -2
- hestia_earth/models/stehfestBouwman2006GisImplementation/noxToAirCropResidueDecomposition.py +3 -2
- hestia_earth/models/stehfestBouwman2006GisImplementation/noxToAirExcreta.py +3 -2
- hestia_earth/models/stehfestBouwman2006GisImplementation/noxToAirInorganicFertiliser.py +3 -2
- hestia_earth/models/stehfestBouwman2006GisImplementation/noxToAirOrganicFertiliser.py +3 -2
- hestia_earth/models/utils/aggregated.py +1 -0
- hestia_earth/models/utils/blank_node.py +394 -72
- hestia_earth/models/utils/cropResidue.py +13 -0
- hestia_earth/models/utils/cycle.py +18 -9
- hestia_earth/models/utils/measurement.py +1 -1
- hestia_earth/models/utils/property.py +4 -4
- hestia_earth/models/utils/term.py +48 -3
- hestia_earth/models/version.py +1 -1
- {hestia_earth_models-0.57.2.dist-info → hestia_earth_models-0.59.0.dist-info}/METADATA +5 -9
- {hestia_earth_models-0.57.2.dist-info → hestia_earth_models-0.59.0.dist-info}/RECORD +109 -97
- {hestia_earth_models-0.57.2.dist-info → hestia_earth_models-0.59.0.dist-info}/WHEEL +1 -1
- tests/models/cycle/animal/input/test_hestiaAggregatedData.py +2 -14
- tests/models/cycle/input/test_hestiaAggregatedData.py +4 -16
- tests/models/cycle/test_coldCarcassWeightPerHead.py +1 -1
- tests/models/cycle/test_coldDressedCarcassWeightPerHead.py +1 -1
- tests/models/cycle/{test_irrigated.py → test_irrigatedTypeUnspecified.py} +1 -1
- tests/models/cycle/test_milkYield.py +58 -0
- tests/models/cycle/test_readyToCookWeightPerHead.py +1 -1
- tests/models/emepEea2019/test_nh3ToAirInorganicFertiliser.py +1 -1
- tests/models/geospatialDatabase/test_clayContent.py +9 -3
- tests/models/geospatialDatabase/test_sandContent.py +9 -3
- tests/models/ipcc2006/test_n2OToAirExcretaDirect.py +7 -2
- tests/models/ipcc2006/test_n2OToAirExcretaIndirect.py +1 -1
- tests/models/ipcc2006/test_n2OToAirInorganicFertiliserDirect.py +7 -2
- tests/models/ipcc2006/test_n2OToAirInorganicFertiliserIndirect.py +7 -2
- tests/models/ipcc2006/test_n2OToAirOrganicFertiliserDirect.py +7 -2
- tests/models/ipcc2006/test_n2OToAirOrganicFertiliserIndirect.py +7 -2
- tests/models/ipcc2019/test_ch4ToAirEntericFermentation.py +1 -1
- tests/models/ipcc2019/test_co2ToAirSoilCarbonStockChangeManagementChange.py +228 -0
- tests/models/ipcc2019/test_n2OToAirInorganicFertiliserDirect.py +74 -0
- tests/models/ipcc2019/test_n2OToAirOrganicFertiliserDirect.py +74 -0
- tests/models/ipcc2019/test_organicCarbonPerHa.py +303 -1044
- tests/models/koble2014/test_residueBurnt.py +1 -2
- tests/models/koble2014/test_residueLeftOnField.py +1 -2
- tests/models/koble2014/test_residueRemoved.py +1 -2
- tests/models/koble2014/test_utils.py +52 -0
- tests/models/site/test_management.py +117 -0
- tests/models/site/test_organicCarbonPerHa.py +51 -5
- tests/models/utils/test_blank_node.py +230 -34
- tests/models/utils/test_term.py +17 -3
- {hestia_earth_models-0.57.2.dist-info → hestia_earth_models-0.59.0.dist-info}/LICENSE +0 -0
- {hestia_earth_models-0.57.2.dist-info → hestia_earth_models-0.59.0.dist-info}/top_level.txt +0 -0
|
@@ -8,6 +8,7 @@ from functools import reduce
|
|
|
8
8
|
from statistics import mode, mean
|
|
9
9
|
from typing import (
|
|
10
10
|
Any,
|
|
11
|
+
List,
|
|
11
12
|
Callable,
|
|
12
13
|
NamedTuple,
|
|
13
14
|
Optional,
|
|
@@ -23,7 +24,7 @@ from hestia_earth.utils.tools import (
|
|
|
23
24
|
)
|
|
24
25
|
|
|
25
26
|
from ..log import debugValues, log_as_table
|
|
26
|
-
from . import _filter_list_term_unit
|
|
27
|
+
from . import is_from_model, _filter_list_term_unit
|
|
27
28
|
from .constant import Units
|
|
28
29
|
from .property import get_node_property, get_node_property_value
|
|
29
30
|
from .lookup import (
|
|
@@ -108,7 +109,7 @@ def run_if_required(model: str, term_id: str, data: dict, module):
|
|
|
108
109
|
return getattr(module, 'run')(data) if is_run_required(model, _module_term_id(term_id, module), data) else []
|
|
109
110
|
|
|
110
111
|
|
|
111
|
-
def find_terms_value(nodes: list, term_id: str):
|
|
112
|
+
def find_terms_value(nodes: list, term_id: str, default: Union[int, None] = 0):
|
|
112
113
|
"""
|
|
113
114
|
Returns the sum of all blank nodes in the list which match the `Term` with the given `@id`.
|
|
114
115
|
|
|
@@ -124,7 +125,17 @@ def find_terms_value(nodes: list, term_id: str):
|
|
|
124
125
|
float
|
|
125
126
|
The total `value` as a number.
|
|
126
127
|
"""
|
|
127
|
-
return list_sum(get_total_value(filter(lambda node: node.get('term', {}).get('@id') == term_id, nodes)))
|
|
128
|
+
return list_sum(get_total_value(filter(lambda node: node.get('term', {}).get('@id') == term_id, nodes)), default)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def has_gap_filled_by_ids(nodes: list, term_ids: List[str]):
|
|
132
|
+
nodes = [n for n in nodes if n.get('term', {}).get('@id') in term_ids]
|
|
133
|
+
return any([is_from_model(n) for n in nodes])
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def has_original_by_ids(nodes: list, term_ids: List[str]):
|
|
137
|
+
nodes = [n for n in nodes if n.get('term', {}).get('@id') in term_ids]
|
|
138
|
+
return any([not is_from_model(n) for n in nodes])
|
|
128
139
|
|
|
129
140
|
|
|
130
141
|
def get_total_value(nodes: list):
|
|
@@ -294,32 +305,35 @@ def get_P2O5_total(nodes: list) -> list:
|
|
|
294
305
|
return get_total_value(kg_P_nodes) + get_total_value_converted(kg_N_nodes + kg_nodes, 'phosphateContentAsP2O5')
|
|
295
306
|
|
|
296
307
|
|
|
297
|
-
def convert_to_nitrogen(node: dict, model: str, term_id: str, blank_nodes: list):
|
|
308
|
+
def convert_to_nitrogen(node: dict, model: str, term_id: str, blank_nodes: list, **log_args):
|
|
298
309
|
def prop_value(input: dict):
|
|
299
|
-
value = get_node_property_value(model, input, 'nitrogenContent')
|
|
300
|
-
return value or get_node_property_value(model, input, 'crudeProteinContent', default=0) / 6.25
|
|
310
|
+
value = get_node_property_value(model, input, 'nitrogenContent', default=None, **log_args)
|
|
311
|
+
return value or get_node_property_value(model, input, 'crudeProteinContent', default=0, **log_args) / 6.25
|
|
301
312
|
|
|
302
313
|
values = [(i, prop_value(i)) for i in blank_nodes]
|
|
303
314
|
missing_nitrogen_property = [i.get('term', {}).get('@id') for i, p_value in values if not p_value]
|
|
304
315
|
|
|
305
316
|
debugValues(node, model=model, term=term_id,
|
|
306
|
-
missing_nitrogen_property=';'.join(set(missing_nitrogen_property))
|
|
317
|
+
missing_nitrogen_property=';'.join(set(missing_nitrogen_property)),
|
|
318
|
+
**log_args)
|
|
307
319
|
|
|
308
320
|
return list_sum([
|
|
309
321
|
list_sum(i.get('value', [])) * p_value for i, p_value in values if p_value is not None
|
|
310
322
|
]) if len(missing_nitrogen_property) == 0 else None
|
|
311
323
|
|
|
312
324
|
|
|
313
|
-
def convert_to_carbon(node: dict, model: str, term_id: str, blank_nodes: list):
|
|
325
|
+
def convert_to_carbon(node: dict, model: str, term_id: str, blank_nodes: list, **log_args):
|
|
314
326
|
def prop_value(input: dict):
|
|
315
|
-
value = get_node_property_value(model, input, 'carbonContent')
|
|
316
|
-
return value or
|
|
327
|
+
value = get_node_property_value(model, input, 'carbonContent', default=None, **log_args)
|
|
328
|
+
return value or \
|
|
329
|
+
get_node_property_value(model, input, 'energyContentHigherHeatingValue', default=0, **log_args) * 0.021
|
|
317
330
|
|
|
318
331
|
values = [(i, prop_value(i)) for i in blank_nodes]
|
|
319
332
|
missing_carbon_property = [i.get('term', {}).get('@id') for i, p_value in values if not p_value]
|
|
320
333
|
|
|
321
334
|
debugValues(node, model=model, term=term_id,
|
|
322
|
-
missing_carbon_property=';'.join(missing_carbon_property)
|
|
335
|
+
missing_carbon_property=';'.join(missing_carbon_property),
|
|
336
|
+
**log_args)
|
|
323
337
|
|
|
324
338
|
return list_sum([
|
|
325
339
|
list_sum(i.get('value', [])) * p_value for i, p_value in values if p_value is not None
|
|
@@ -399,7 +413,8 @@ def _retrieve_array_treatment(
|
|
|
399
413
|
def get_node_value(
|
|
400
414
|
node: dict,
|
|
401
415
|
is_larger_unit: bool = False,
|
|
402
|
-
array_treatment: Optional[ArrayTreatment] = None
|
|
416
|
+
array_treatment: Optional[ArrayTreatment] = None,
|
|
417
|
+
default: Any = 0
|
|
403
418
|
) -> Union[float, bool]:
|
|
404
419
|
"""
|
|
405
420
|
Get the value from the dictionary representing the node,
|
|
@@ -427,7 +442,7 @@ def get_node_value(
|
|
|
427
442
|
array_treatment or _retrieve_array_treatment(node, is_larger_unit=is_larger_unit)
|
|
428
443
|
)] if isinstance(value, list) and len(value) > 0 else None
|
|
429
444
|
|
|
430
|
-
return reducer(value) if reducer else value if isinstance(value, bool) else value or
|
|
445
|
+
return reducer(value) if reducer else value if isinstance(value, bool) else value or default
|
|
431
446
|
|
|
432
447
|
|
|
433
448
|
def _convert_to_set(
|
|
@@ -653,7 +668,7 @@ def cumulative_nodes_lookup_match(
|
|
|
653
668
|
)
|
|
654
669
|
|
|
655
670
|
|
|
656
|
-
# ---
|
|
671
|
+
# --- Blank Node date utils ---
|
|
657
672
|
|
|
658
673
|
|
|
659
674
|
class DatestrFormat(Enum):
|
|
@@ -670,6 +685,16 @@ class DatestrFormat(Enum):
|
|
|
670
685
|
MONTH_DAY = r"--%m-%d"
|
|
671
686
|
|
|
672
687
|
|
|
688
|
+
DATESTR_FORMAT_TO_EXPECTED_LENGTH = {
|
|
689
|
+
DatestrFormat.YEAR: len("2001"),
|
|
690
|
+
DatestrFormat.YEAR_MONTH: len("2001-01"),
|
|
691
|
+
DatestrFormat.YEAR_MONTH_DAY: len("2001-01-01"),
|
|
692
|
+
DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND: len("2001-01-01T00:00:00"),
|
|
693
|
+
DatestrFormat.MONTH: len("--01"),
|
|
694
|
+
DatestrFormat.MONTH_DAY: len("--01-01")
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
|
|
673
698
|
DatestrGapfillMode = Enum("DatestrGapfillMode", [
|
|
674
699
|
"START",
|
|
675
700
|
"END"
|
|
@@ -698,16 +723,15 @@ end : datetime
|
|
|
698
723
|
"""
|
|
699
724
|
|
|
700
725
|
|
|
701
|
-
def _check_datestr_format(datestr: str, format:
|
|
726
|
+
def _check_datestr_format(datestr: str, format: DatestrFormat) -> bool:
|
|
702
727
|
"""
|
|
703
728
|
Use `datetime.strptime` to determine if a datestr is in a particular ISO format.
|
|
704
729
|
"""
|
|
705
730
|
try:
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
)
|
|
710
|
-
return bool(datetime.strptime(str(datestr), date_format_str))
|
|
731
|
+
expected_length = DATESTR_FORMAT_TO_EXPECTED_LENGTH.get(format, 0)
|
|
732
|
+
format_str = format.value
|
|
733
|
+
parsed_datetime = datetime.strptime(datestr, format_str)
|
|
734
|
+
return bool(parsed_datetime) and len(datestr) == expected_length
|
|
711
735
|
except ValueError:
|
|
712
736
|
return False
|
|
713
737
|
|
|
@@ -771,21 +795,25 @@ def _datetime_within_range(datetime: datetime, range: DatetimeRange) -> bool:
|
|
|
771
795
|
"""
|
|
772
796
|
Determine whether or not a `datetime` falls within a `DatetimeRange`.
|
|
773
797
|
"""
|
|
774
|
-
return range.start
|
|
798
|
+
return range.start <= datetime <= range.end
|
|
775
799
|
|
|
776
800
|
|
|
777
|
-
def _datetime_range_duration(range: DatetimeRange) -> float:
|
|
801
|
+
def _datetime_range_duration(range: DatetimeRange, add_second=False) -> float:
|
|
778
802
|
"""
|
|
779
803
|
Determine the length of a `DatetimeRange` in seconds.
|
|
804
|
+
|
|
805
|
+
Option to `add_second` to account for 1 second between 23:59:59 and 00:00:00)
|
|
780
806
|
"""
|
|
781
|
-
return (range.end - range.start).total_seconds()
|
|
807
|
+
return (range.end - range.start).total_seconds() + int(add_second)
|
|
782
808
|
|
|
783
809
|
|
|
784
810
|
def _calc_datetime_range_intersection_duration(
|
|
785
|
-
range_a: DatetimeRange, range_b: DatetimeRange
|
|
811
|
+
range_a: DatetimeRange, range_b: DatetimeRange, add_second=False
|
|
786
812
|
) -> float:
|
|
787
813
|
"""
|
|
788
814
|
Determine the length of a `DatetimeRange` in seconds.
|
|
815
|
+
|
|
816
|
+
Option to `add_second` to account for 1 second between 23:59:59 and 00:00:00)
|
|
789
817
|
"""
|
|
790
818
|
latest_start = max(range_a.start, range_b.start)
|
|
791
819
|
earliest_end = min(range_a.end, range_b.end)
|
|
@@ -795,14 +823,156 @@ def _calc_datetime_range_intersection_duration(
|
|
|
795
823
|
end=earliest_end
|
|
796
824
|
)
|
|
797
825
|
|
|
826
|
+
duration = _datetime_range_duration(intersection_range)
|
|
827
|
+
|
|
798
828
|
# if less than 0 the ranges do not intersect, so return 0.
|
|
799
|
-
return
|
|
829
|
+
return (
|
|
830
|
+
_datetime_range_duration(intersection_range) + int(add_second)
|
|
831
|
+
if duration > 0 else 0
|
|
832
|
+
)
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
# --- Group nodes by year ---
|
|
836
|
+
|
|
837
|
+
|
|
838
|
+
VALID_DATE_FORMATS_GROUP_NODES_BY_YEAR = {
|
|
839
|
+
DatestrFormat.YEAR,
|
|
840
|
+
DatestrFormat.YEAR_MONTH,
|
|
841
|
+
DatestrFormat.YEAR_MONTH_DAY,
|
|
842
|
+
DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
GroupNodesByYearMode = Enum("GroupNodesByYearMode", [
|
|
847
|
+
"START_AND_END_DATE",
|
|
848
|
+
"DATES"
|
|
849
|
+
])
|
|
850
|
+
"""
|
|
851
|
+
Enum representing modes of grouping nodes by year.
|
|
852
|
+
|
|
853
|
+
Members
|
|
854
|
+
-------
|
|
855
|
+
START_AND_END_DATE
|
|
856
|
+
Use the `startDate` and `endDate` fields of the node.
|
|
857
|
+
DATES
|
|
858
|
+
Use the `dates` field of the node.
|
|
859
|
+
"""
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
def _should_run_node_by_end_date(node: dict) -> bool:
|
|
863
|
+
"""
|
|
864
|
+
Validate nodes for `group_nodes_by_year` using the "startDate" and "endDate" fields.
|
|
865
|
+
"""
|
|
866
|
+
return _get_datestr_format(node.get("endDate")) in VALID_DATE_FORMATS_GROUP_NODES_BY_YEAR
|
|
800
867
|
|
|
801
868
|
|
|
802
|
-
def
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
869
|
+
def _should_run_node_by_dates(node: dict) -> bool:
|
|
870
|
+
"""
|
|
871
|
+
Validate nodes for `group_nodes_by_year` using the "dates" field.
|
|
872
|
+
"""
|
|
873
|
+
value = node.get("value")
|
|
874
|
+
dates = node.get("dates")
|
|
875
|
+
return (
|
|
876
|
+
value and dates and len(dates) > 0 and len(value) == len(dates)
|
|
877
|
+
and all(_get_datestr_format(datestr) in VALID_DATE_FORMATS_GROUP_NODES_BY_YEAR for datestr in node.get("dates"))
|
|
878
|
+
)
|
|
879
|
+
|
|
880
|
+
|
|
881
|
+
GROUP_NODES_BY_YEAR_MODE_TO_SHOULD_RUN_NODE_FUNCTION = {
|
|
882
|
+
GroupNodesByYearMode.START_AND_END_DATE: _should_run_node_by_end_date,
|
|
883
|
+
GroupNodesByYearMode.DATES: _should_run_node_by_dates
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
def _get_node_datetime_range_from_start_and_end_date(
|
|
888
|
+
node: dict, default_node_duration: int = 1
|
|
889
|
+
) -> Union[DatetimeRange, None]:
|
|
890
|
+
"""
|
|
891
|
+
Get the datetime range from a node's "startDate" and "endDate" fields.
|
|
892
|
+
|
|
893
|
+
If "startDate" field is not available, a start date is calculated using the end date
|
|
894
|
+
and `default_node_duration`.
|
|
895
|
+
"""
|
|
896
|
+
end = safe_parse_date(_gapfill_datestr(node.get("endDate"), DatestrGapfillMode.END))
|
|
897
|
+
start = (
|
|
898
|
+
safe_parse_date(_gapfill_datestr(node.get("startDate"), DatestrGapfillMode.START))
|
|
899
|
+
or end - relativedelta(years=default_node_duration, seconds=-1) if end else None
|
|
900
|
+
)
|
|
901
|
+
|
|
902
|
+
valid = isinstance(start, datetime) and isinstance(end, datetime)
|
|
903
|
+
return DatetimeRange(start, end) if valid else None
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
def _get_node_datetime_range_from_dates(
|
|
907
|
+
node: dict, **_
|
|
908
|
+
) -> Union[DatetimeRange, None]:
|
|
909
|
+
"""
|
|
910
|
+
Get the datetime range from a node's "dates" field.
|
|
911
|
+
"""
|
|
912
|
+
dates = node.get("dates")
|
|
913
|
+
end = max(
|
|
914
|
+
non_empty_list(
|
|
915
|
+
safe_parse_date(_gapfill_datestr(datestr, DatestrGapfillMode.END)) for datestr in dates
|
|
916
|
+
), default=None
|
|
917
|
+
)
|
|
918
|
+
start = min(
|
|
919
|
+
non_empty_list(
|
|
920
|
+
safe_parse_date(_gapfill_datestr(datestr, DatestrGapfillMode.START)) for datestr in dates
|
|
921
|
+
), default=None
|
|
922
|
+
)
|
|
923
|
+
|
|
924
|
+
valid = isinstance(start, datetime) and isinstance(end, datetime)
|
|
925
|
+
return DatetimeRange(start, end) if valid else None
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
GROUP_NODES_BY_YEAR_MODE_TO_GET_DATETIME_RANGE_FUNCTION = {
|
|
929
|
+
GroupNodesByYearMode.START_AND_END_DATE: _get_node_datetime_range_from_start_and_end_date,
|
|
930
|
+
GroupNodesByYearMode.DATES: _get_node_datetime_range_from_dates
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
def _build_time_fraction_dict(
|
|
935
|
+
group_datetime_range: DatetimeRange,
|
|
936
|
+
node_datetime_range: DatetimeRange
|
|
937
|
+
) -> dict:
|
|
938
|
+
"""
|
|
939
|
+
Build a dictionary containing fractions of the year and node duration based on datetime ranges.
|
|
940
|
+
|
|
941
|
+
This function calculates the duration of the group or year, the duration of the node, and the intersection
|
|
942
|
+
duration between the two. It then computes the fractions of the year and node duration represented by the
|
|
943
|
+
intersection. The results are returned in a dictionary.
|
|
944
|
+
|
|
945
|
+
Parameters
|
|
946
|
+
----------
|
|
947
|
+
group_datetime_range : DatetimeRange
|
|
948
|
+
The datetime range representing the entire group or year.
|
|
949
|
+
node_datetime_range : DatetimeRange
|
|
950
|
+
The datetime range representing the node.
|
|
951
|
+
|
|
952
|
+
Returns
|
|
953
|
+
-------
|
|
954
|
+
dict
|
|
955
|
+
A dictionary containing "fraction_of_group_duration" and "fraction_of_node_duration".
|
|
956
|
+
"""
|
|
957
|
+
group_duration = _datetime_range_duration(group_datetime_range, add_second=True)
|
|
958
|
+
node_duration = _datetime_range_duration(node_datetime_range, add_second=True)
|
|
959
|
+
|
|
960
|
+
intersection_duration = _calc_datetime_range_intersection_duration(
|
|
961
|
+
node_datetime_range, group_datetime_range, add_second=True
|
|
962
|
+
)
|
|
963
|
+
|
|
964
|
+
fraction_of_group_duration = intersection_duration / group_duration
|
|
965
|
+
fraction_of_node_duration = intersection_duration / node_duration
|
|
966
|
+
|
|
967
|
+
return {
|
|
968
|
+
"fraction_of_group_duration": fraction_of_group_duration,
|
|
969
|
+
"fraction_of_node_duration": fraction_of_node_duration
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
|
|
973
|
+
def _validate_time_fraction_dict(
|
|
974
|
+
time_fraction_dict: dict,
|
|
975
|
+
is_final_group: bool
|
|
806
976
|
) -> bool:
|
|
807
977
|
"""
|
|
808
978
|
Return `True` if the the node intersections with a year group by
|
|
@@ -814,24 +984,54 @@ def _validate_intersection_threshold(
|
|
|
814
984
|
be counted in the year group if the majority of that node takes place in
|
|
815
985
|
that year.
|
|
816
986
|
"""
|
|
817
|
-
|
|
987
|
+
FRACTION_OF_GROUP_DURATION_THRESHOLD = 0.3
|
|
818
988
|
FRACTION_OF_NODE_DURATION_THRESHOLD = 0.5
|
|
819
989
|
|
|
820
|
-
return (
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
)
|
|
990
|
+
return any([
|
|
991
|
+
time_fraction_dict["fraction_of_group_duration"] > FRACTION_OF_GROUP_DURATION_THRESHOLD,
|
|
992
|
+
time_fraction_dict["fraction_of_node_duration"] > FRACTION_OF_NODE_DURATION_THRESHOLD,
|
|
993
|
+
is_final_group and time_fraction_dict["fraction_of_node_duration"] == FRACTION_OF_NODE_DURATION_THRESHOLD
|
|
994
|
+
])
|
|
995
|
+
|
|
996
|
+
|
|
997
|
+
def _build_update_dict(node: dict, years: list, target_year: int) -> dict:
|
|
998
|
+
"""
|
|
999
|
+
Build an update dictionary containing values and dates from a node that fall within a given year.
|
|
1000
|
+
|
|
1001
|
+
This is only required if when `group_nodes_by_year` `mode = GroupNodesByYearMode.DATES`
|
|
1002
|
+
|
|
1003
|
+
Parameters
|
|
1004
|
+
----------
|
|
1005
|
+
node : dict
|
|
1006
|
+
The node containing values and dates.
|
|
1007
|
+
year : int
|
|
1008
|
+
The year to be matched.
|
|
1009
|
+
|
|
1010
|
+
Returns
|
|
1011
|
+
-------
|
|
1012
|
+
dict
|
|
1013
|
+
An update dictionary containing "value" and "dates" keys.
|
|
1014
|
+
"""
|
|
1015
|
+
valid_indices = {
|
|
1016
|
+
i for i, y in enumerate(years) if y == target_year
|
|
1017
|
+
}
|
|
1018
|
+
return {
|
|
1019
|
+
"value": [node.get("value")[i] for i in valid_indices],
|
|
1020
|
+
"dates": [node.get("dates")[i] for i in valid_indices]
|
|
1021
|
+
}
|
|
825
1022
|
|
|
826
1023
|
|
|
827
1024
|
def group_nodes_by_year(
|
|
828
1025
|
nodes: list[dict],
|
|
829
1026
|
default_node_duration: int = 1,
|
|
830
|
-
sort_result: bool = True
|
|
1027
|
+
sort_result: bool = True,
|
|
1028
|
+
inner_key: Union[Any, None] = None,
|
|
1029
|
+
mode: GroupNodesByYearMode = GroupNodesByYearMode.START_AND_END_DATE
|
|
831
1030
|
) -> dict[int, list[dict]]:
|
|
832
1031
|
"""
|
|
833
|
-
Group nodes by year based on their
|
|
834
|
-
using `_gapfill_datestr`
|
|
1032
|
+
Group nodes by year based on either their "startDate" and "endDate" fields or their
|
|
1033
|
+
"dates" field. Incomplete date strings are gap-filled automatically using `_gapfill_datestr`
|
|
1034
|
+
function.
|
|
835
1035
|
|
|
836
1036
|
Parameters
|
|
837
1037
|
----------
|
|
@@ -841,32 +1041,38 @@ def group_nodes_by_year(
|
|
|
841
1041
|
Default duration of a node years if start date is not available, by default 1.
|
|
842
1042
|
sort_result : bool, optional
|
|
843
1043
|
Flag to sort the result by year, by default True.
|
|
1044
|
+
inner_key: Any | None
|
|
1045
|
+
An optional inner dictionary key for the outputted annualised groups (can be used to merge annualised
|
|
1046
|
+
dictionaries together), default value: `None`.
|
|
1047
|
+
mode : GroupNodesByYearMode, optional
|
|
1048
|
+
The mode to determine how nodes are grouped by year. Options are defined in `GroupNodesByYearMode`.
|
|
844
1049
|
|
|
845
1050
|
Returns
|
|
846
1051
|
-------
|
|
847
1052
|
dict[int, list[dict]]
|
|
848
1053
|
A dictionary where keys are years and values are lists of nodes.
|
|
849
1054
|
"""
|
|
850
|
-
def group_node(groups: dict, index: int):
|
|
851
|
-
node = nodes[index]
|
|
852
1055
|
|
|
853
|
-
|
|
854
|
-
|
|
1056
|
+
should_run_node = GROUP_NODES_BY_YEAR_MODE_TO_SHOULD_RUN_NODE_FUNCTION[mode]
|
|
1057
|
+
get_node_datetime_range = GROUP_NODES_BY_YEAR_MODE_TO_GET_DATETIME_RANGE_FUNCTION[mode]
|
|
1058
|
+
|
|
1059
|
+
valid_nodes = [node for node in nodes if should_run_node(node)]
|
|
855
1060
|
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
1061
|
+
def group_node(groups: dict, index: int):
|
|
1062
|
+
node = valid_nodes[index]
|
|
1063
|
+
|
|
1064
|
+
node_datetime_range = get_node_datetime_range(
|
|
1065
|
+
node, default_node_duration=default_node_duration
|
|
861
1066
|
)
|
|
862
1067
|
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
1068
|
+
# pre-parse the "dates" field so it doesn't get re-calculated in each iteration of the for-loop
|
|
1069
|
+
years = (
|
|
1070
|
+
[safe_parse_date(datestr).year for datestr in node.get("dates", [])]
|
|
1071
|
+
if mode == GroupNodesByYearMode.DATES else []
|
|
866
1072
|
)
|
|
867
1073
|
|
|
868
|
-
|
|
869
|
-
|
|
1074
|
+
range_start = node_datetime_range.start.year if node_datetime_range else 0
|
|
1075
|
+
range_end = node_datetime_range.end.year + 1 if node_datetime_range else 0
|
|
870
1076
|
|
|
871
1077
|
for year in range(range_start, range_end):
|
|
872
1078
|
|
|
@@ -877,32 +1083,148 @@ def group_nodes_by_year(
|
|
|
877
1083
|
|
|
878
1084
|
is_final_year = _datetime_within_range(node_datetime_range.end, group_datetime_range)
|
|
879
1085
|
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
1086
|
+
time_fraction_dict = _build_time_fraction_dict(group_datetime_range, node_datetime_range)
|
|
1087
|
+
update_dict = _build_update_dict(node, years, year) if mode == GroupNodesByYearMode.DATES else {}
|
|
1088
|
+
|
|
1089
|
+
should_run = (
|
|
1090
|
+
mode == GroupNodesByYearMode.DATES
|
|
1091
|
+
or _validate_time_fraction_dict(
|
|
1092
|
+
time_fraction_dict,
|
|
1093
|
+
is_final_year
|
|
1094
|
+
)
|
|
885
1095
|
)
|
|
886
1096
|
|
|
887
|
-
|
|
888
|
-
|
|
1097
|
+
should_run and groups[year].append(
|
|
1098
|
+
node | time_fraction_dict | update_dict
|
|
1099
|
+
)
|
|
889
1100
|
|
|
890
|
-
|
|
891
|
-
"fraction_of_year": fraction_of_year,
|
|
892
|
-
"fraction_of_node_duration": fraction_of_node_duration
|
|
893
|
-
}
|
|
1101
|
+
return groups
|
|
894
1102
|
|
|
895
|
-
|
|
1103
|
+
grouped = reduce(group_node, range(len(valid_nodes)), defaultdict(list))
|
|
896
1104
|
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
1105
|
+
iterated = {
|
|
1106
|
+
year: {inner_key: group} if inner_key else group
|
|
1107
|
+
for year, group in grouped.items()
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
return dict(sorted(iterated.items())) if sort_result else iterated
|
|
1111
|
+
|
|
1112
|
+
|
|
1113
|
+
def group_nodes_by_year_and_month(
|
|
1114
|
+
nodes: list[dict],
|
|
1115
|
+
default_node_duration: int = 1,
|
|
1116
|
+
sort_result: bool = True,
|
|
1117
|
+
inner_key: Union[Any, None] = None
|
|
1118
|
+
) -> dict[int, list[dict]]:
|
|
1119
|
+
"""
|
|
1120
|
+
Group nodes by year based on either their "startDate" and "endDate" fields. Incomplete date strings are gap-filled
|
|
1121
|
+
automatically using `_gapfill_datestr` function.
|
|
1122
|
+
|
|
1123
|
+
Returns a dict in the shape:
|
|
1124
|
+
```
|
|
1125
|
+
{
|
|
1126
|
+
year (int): {
|
|
1127
|
+
month (int): nodes (list[dict]) # for each month 1 - 12
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
```
|
|
1131
|
+
|
|
1132
|
+
Parameters
|
|
1133
|
+
----------
|
|
1134
|
+
nodes : list[dict]
|
|
1135
|
+
A list of nodes with start and end date information.
|
|
1136
|
+
default_node_duration : int, optional
|
|
1137
|
+
Default duration of a node years if start date is not available, by default 1.
|
|
1138
|
+
sort_result : bool, optional
|
|
1139
|
+
Flag to sort the result by year, by default True.
|
|
1140
|
+
inner_key: Any | None
|
|
1141
|
+
An optional inner dictionary key for the outputted annualised groups (can be used to merge annualised
|
|
1142
|
+
dictionaries together), default value: `None`.
|
|
1143
|
+
|
|
1144
|
+
Returns
|
|
1145
|
+
-------
|
|
1146
|
+
dict[int, list[dict]]
|
|
1147
|
+
A dictionary where keys are years and values are lists of nodes.
|
|
1148
|
+
"""
|
|
1149
|
+
valid_nodes = [node for node in nodes if _should_run_node_by_end_date(node)]
|
|
1150
|
+
|
|
1151
|
+
def group_node(groups: dict, index: int):
|
|
1152
|
+
node = valid_nodes[index]
|
|
1153
|
+
|
|
1154
|
+
node_datetime_range = _get_node_datetime_range_from_start_and_end_date(
|
|
1155
|
+
node, default_node_duration=default_node_duration
|
|
1156
|
+
)
|
|
1157
|
+
|
|
1158
|
+
range_start = node_datetime_range.start.year if node_datetime_range else 0
|
|
1159
|
+
range_end = node_datetime_range.end.year + 1 if node_datetime_range else 0
|
|
1160
|
+
|
|
1161
|
+
for year in range(range_start, range_end):
|
|
1162
|
+
for month in range(1, 13):
|
|
902
1163
|
|
|
903
|
-
|
|
1164
|
+
group_datetime_range = DatetimeRange(
|
|
1165
|
+
start=safe_parse_date(_gapfill_datestr(f"{year}-{month:02}", DatestrGapfillMode.START)),
|
|
1166
|
+
end=safe_parse_date(_gapfill_datestr(f"{year}-{month}", DatestrGapfillMode.END))
|
|
1167
|
+
)
|
|
1168
|
+
|
|
1169
|
+
is_final_month = _datetime_within_range(node_datetime_range.end, group_datetime_range)
|
|
1170
|
+
time_fraction_dict = _build_time_fraction_dict(group_datetime_range, node_datetime_range)
|
|
1171
|
+
should_run = _validate_time_fraction_dict(time_fraction_dict, is_final_month)
|
|
1172
|
+
|
|
1173
|
+
should_run and groups[year][month].append(node)
|
|
904
1174
|
|
|
905
1175
|
return groups
|
|
906
1176
|
|
|
907
|
-
grouped = reduce(group_node, range(len(
|
|
908
|
-
|
|
1177
|
+
grouped = reduce(group_node, range(len(valid_nodes)), defaultdict(lambda: defaultdict(list)))
|
|
1178
|
+
|
|
1179
|
+
iterated = {
|
|
1180
|
+
year: {inner_key: dict(group)} if inner_key else dict(group)
|
|
1181
|
+
for year, group in grouped.items()
|
|
1182
|
+
}
|
|
1183
|
+
|
|
1184
|
+
return dict(sorted(iterated.items())) if sort_result else iterated
|
|
1185
|
+
|
|
1186
|
+
|
|
1187
|
+
# --- Group nodes by last date ---
|
|
1188
|
+
|
|
1189
|
+
|
|
1190
|
+
def _get_last_date(datestrs: list[str]) -> Optional[str]:
|
|
1191
|
+
"""
|
|
1192
|
+
Reduce a datestrs down to a single datestr by selecting the last one.
|
|
1193
|
+
|
|
1194
|
+
Parameters
|
|
1195
|
+
----------
|
|
1196
|
+
datestrs : list
|
|
1197
|
+
A list of datestrings, e.g. the value of a node's `dates` field.
|
|
1198
|
+
|
|
1199
|
+
Returns
|
|
1200
|
+
-------
|
|
1201
|
+
str | None
|
|
1202
|
+
Returns the latest datestr or `None` if no valid datestr in list.
|
|
1203
|
+
|
|
1204
|
+
"""
|
|
1205
|
+
return sorted(datestrs)[-1] if len(datestrs) > 0 else None
|
|
1206
|
+
|
|
1207
|
+
|
|
1208
|
+
def group_nodes_by_last_date(nodes: list) -> dict[str, list[dict]]:
|
|
1209
|
+
"""
|
|
1210
|
+
Group a list of nodes by the last date of their `dates` field. Nodes with no `dates` field will be sorted into
|
|
1211
|
+
the `no-dates` group.
|
|
1212
|
+
|
|
1213
|
+
Parameters
|
|
1214
|
+
----------
|
|
1215
|
+
nodes : list[dict]
|
|
1216
|
+
A list of Hestia format nodes.
|
|
1217
|
+
|
|
1218
|
+
Return
|
|
1219
|
+
------
|
|
1220
|
+
dict
|
|
1221
|
+
A dictionary of nodes grouped by latest date, in the format `{date: list[node]}`.
|
|
1222
|
+
"""
|
|
1223
|
+
DEFAULT_KEY = 'no-dates'
|
|
1224
|
+
|
|
1225
|
+
def group_by(group: dict, node: dict):
|
|
1226
|
+
dates = node.get('dates', [])
|
|
1227
|
+
key = _get_last_date(dates) or DEFAULT_KEY
|
|
1228
|
+
return group | {key: group.get(key, []) + [node]}
|
|
1229
|
+
|
|
1230
|
+
return reduce(group_by, nodes, {})
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
PRODUCT_ID_TO_PRACTICES_ID = [
|
|
2
|
+
{'product': 'aboveGroundCropResidueRemoved', 'practices': ['residueRemoved']},
|
|
3
|
+
{'product': 'aboveGroundCropResidueIncorporated', 'practices': [
|
|
4
|
+
'residueIncorporated',
|
|
5
|
+
'residueIncorporatedLessThan30DaysBeforeCultivation',
|
|
6
|
+
'residueIncorporatedMoreThan30DaysBeforeCultivation'
|
|
7
|
+
]},
|
|
8
|
+
{'product': 'aboveGroundCropResidueBurnt', 'practices': ['residueBurnt']},
|
|
9
|
+
{'product': 'aboveGroundCropResidueLeftOnField', 'practices': ['residueLeftOnField']}
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def crop_residue_product_ids(): return [v.get('product') for v in PRODUCT_ID_TO_PRACTICES_ID]
|