hestia-earth-models 0.57.2__py3-none-any.whl → 0.59.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hestia-earth-models might be problematic. Click here for more details.

Files changed (109) hide show
  1. hestia_earth/models/cycle/aboveGroundCropResidueTotal.py +17 -12
  2. hestia_earth/models/cycle/excretaKgMass.py +4 -5
  3. hestia_earth/models/cycle/excretaKgN.py +4 -5
  4. hestia_earth/models/cycle/excretaKgVs.py +4 -5
  5. hestia_earth/models/cycle/inorganicFertiliser.py +2 -2
  6. hestia_earth/models/cycle/{irrigated.py → irrigatedTypeUnspecified.py} +4 -4
  7. hestia_earth/models/cycle/liveAnimal.py +9 -11
  8. hestia_earth/models/cycle/milkYield.py +154 -0
  9. hestia_earth/models/cycle/residueIncorporated.py +1 -1
  10. hestia_earth/models/cycle/utils.py +6 -0
  11. hestia_earth/models/emepEea2019/nh3ToAirInorganicFertiliser.py +3 -3
  12. hestia_earth/models/faostat2018/seed.py +2 -3
  13. hestia_earth/models/geospatialDatabase/clayContent.py +17 -4
  14. hestia_earth/models/geospatialDatabase/sandContent.py +17 -4
  15. hestia_earth/models/geospatialDatabase/siltContent.py +2 -2
  16. hestia_earth/models/impact_assessment/irrigated.py +0 -3
  17. hestia_earth/models/ipcc2006/co2ToAirOrganicSoilCultivation.py +2 -2
  18. hestia_earth/models/ipcc2006/n2OToAirCropResidueDecompositionIndirect.py +2 -2
  19. hestia_earth/models/ipcc2006/n2OToAirExcretaDirect.py +1 -1
  20. hestia_earth/models/ipcc2006/n2OToAirExcretaIndirect.py +8 -4
  21. hestia_earth/models/ipcc2006/n2OToAirInorganicFertiliserDirect.py +4 -1
  22. hestia_earth/models/ipcc2006/n2OToAirInorganicFertiliserIndirect.py +1 -1
  23. hestia_earth/models/ipcc2006/n2OToAirOrganicFertiliserDirect.py +1 -1
  24. hestia_earth/models/ipcc2006/n2OToAirOrganicFertiliserIndirect.py +1 -1
  25. hestia_earth/models/ipcc2006/utils.py +11 -8
  26. hestia_earth/models/ipcc2019/ch4ToAirEntericFermentation.py +4 -4
  27. hestia_earth/models/ipcc2019/ch4ToAirFloodedRice.py +16 -7
  28. hestia_earth/models/ipcc2019/co2ToAirSoilCarbonStockChangeManagementChange.py +759 -0
  29. hestia_earth/models/ipcc2019/croppingDuration.py +12 -6
  30. hestia_earth/models/ipcc2019/n2OToAirCropResidueDecompositionDirect.py +5 -52
  31. hestia_earth/models/ipcc2019/n2OToAirInorganicFertiliserDirect.py +104 -0
  32. hestia_earth/models/ipcc2019/n2OToAirInorganicFertiliserIndirect.py +1 -1
  33. hestia_earth/models/ipcc2019/n2OToAirOrganicFertiliserDirect.py +105 -0
  34. hestia_earth/models/ipcc2019/n2OToAirOrganicFertiliserIndirect.py +1 -1
  35. hestia_earth/models/ipcc2019/no3ToGroundwaterCropResidueDecomposition.py +1 -1
  36. hestia_earth/models/ipcc2019/no3ToGroundwaterExcreta.py +1 -1
  37. hestia_earth/models/ipcc2019/no3ToGroundwaterInorganicFertiliser.py +1 -1
  38. hestia_earth/models/ipcc2019/no3ToGroundwaterOrganicFertiliser.py +1 -1
  39. hestia_earth/models/ipcc2019/organicCarbonPerHa.py +1088 -1268
  40. hestia_earth/models/ipcc2019/pastureGrass.py +4 -4
  41. hestia_earth/models/ipcc2019/utils.py +102 -1
  42. hestia_earth/models/koble2014/aboveGroundCropResidue.py +15 -17
  43. hestia_earth/models/koble2014/cropResidueManagement.py +2 -2
  44. hestia_earth/models/koble2014/utils.py +19 -3
  45. hestia_earth/models/linkedImpactAssessment/__init__.py +4 -2
  46. hestia_earth/models/log.py +15 -3
  47. hestia_earth/models/mocking/search-results.json +184 -118
  48. hestia_earth/models/pooreNemecek2018/excretaKgN.py +6 -7
  49. hestia_earth/models/pooreNemecek2018/excretaKgVs.py +7 -6
  50. hestia_earth/models/pooreNemecek2018/no3ToGroundwaterCropResidueDecomposition.py +3 -2
  51. hestia_earth/models/pooreNemecek2018/no3ToGroundwaterExcreta.py +3 -2
  52. hestia_earth/models/pooreNemecek2018/no3ToGroundwaterInorganicFertiliser.py +3 -2
  53. hestia_earth/models/pooreNemecek2018/saplings.py +0 -1
  54. hestia_earth/models/site/management.py +168 -0
  55. hestia_earth/models/site/organicCarbonPerHa.py +251 -89
  56. hestia_earth/models/stehfestBouwman2006/n2OToAirCropResidueDecompositionDirect.py +3 -2
  57. hestia_earth/models/stehfestBouwman2006/n2OToAirExcretaDirect.py +3 -2
  58. hestia_earth/models/stehfestBouwman2006/n2OToAirInorganicFertiliserDirect.py +3 -2
  59. hestia_earth/models/stehfestBouwman2006/n2OToAirOrganicFertiliserDirect.py +3 -2
  60. hestia_earth/models/stehfestBouwman2006/noxToAirCropResidueDecomposition.py +3 -2
  61. hestia_earth/models/stehfestBouwman2006/noxToAirExcreta.py +3 -2
  62. hestia_earth/models/stehfestBouwman2006/noxToAirInorganicFertiliser.py +3 -2
  63. hestia_earth/models/stehfestBouwman2006/noxToAirOrganicFertiliser.py +3 -2
  64. hestia_earth/models/stehfestBouwman2006GisImplementation/noxToAirCropResidueDecomposition.py +3 -2
  65. hestia_earth/models/stehfestBouwman2006GisImplementation/noxToAirExcreta.py +3 -2
  66. hestia_earth/models/stehfestBouwman2006GisImplementation/noxToAirInorganicFertiliser.py +3 -2
  67. hestia_earth/models/stehfestBouwman2006GisImplementation/noxToAirOrganicFertiliser.py +3 -2
  68. hestia_earth/models/utils/aggregated.py +1 -0
  69. hestia_earth/models/utils/blank_node.py +394 -72
  70. hestia_earth/models/utils/cropResidue.py +13 -0
  71. hestia_earth/models/utils/cycle.py +18 -9
  72. hestia_earth/models/utils/measurement.py +1 -1
  73. hestia_earth/models/utils/property.py +4 -4
  74. hestia_earth/models/utils/term.py +48 -3
  75. hestia_earth/models/version.py +1 -1
  76. {hestia_earth_models-0.57.2.dist-info → hestia_earth_models-0.59.0.dist-info}/METADATA +5 -9
  77. {hestia_earth_models-0.57.2.dist-info → hestia_earth_models-0.59.0.dist-info}/RECORD +109 -97
  78. {hestia_earth_models-0.57.2.dist-info → hestia_earth_models-0.59.0.dist-info}/WHEEL +1 -1
  79. tests/models/cycle/animal/input/test_hestiaAggregatedData.py +2 -14
  80. tests/models/cycle/input/test_hestiaAggregatedData.py +4 -16
  81. tests/models/cycle/test_coldCarcassWeightPerHead.py +1 -1
  82. tests/models/cycle/test_coldDressedCarcassWeightPerHead.py +1 -1
  83. tests/models/cycle/{test_irrigated.py → test_irrigatedTypeUnspecified.py} +1 -1
  84. tests/models/cycle/test_milkYield.py +58 -0
  85. tests/models/cycle/test_readyToCookWeightPerHead.py +1 -1
  86. tests/models/emepEea2019/test_nh3ToAirInorganicFertiliser.py +1 -1
  87. tests/models/geospatialDatabase/test_clayContent.py +9 -3
  88. tests/models/geospatialDatabase/test_sandContent.py +9 -3
  89. tests/models/ipcc2006/test_n2OToAirExcretaDirect.py +7 -2
  90. tests/models/ipcc2006/test_n2OToAirExcretaIndirect.py +1 -1
  91. tests/models/ipcc2006/test_n2OToAirInorganicFertiliserDirect.py +7 -2
  92. tests/models/ipcc2006/test_n2OToAirInorganicFertiliserIndirect.py +7 -2
  93. tests/models/ipcc2006/test_n2OToAirOrganicFertiliserDirect.py +7 -2
  94. tests/models/ipcc2006/test_n2OToAirOrganicFertiliserIndirect.py +7 -2
  95. tests/models/ipcc2019/test_ch4ToAirEntericFermentation.py +1 -1
  96. tests/models/ipcc2019/test_co2ToAirSoilCarbonStockChangeManagementChange.py +228 -0
  97. tests/models/ipcc2019/test_n2OToAirInorganicFertiliserDirect.py +74 -0
  98. tests/models/ipcc2019/test_n2OToAirOrganicFertiliserDirect.py +74 -0
  99. tests/models/ipcc2019/test_organicCarbonPerHa.py +303 -1044
  100. tests/models/koble2014/test_residueBurnt.py +1 -2
  101. tests/models/koble2014/test_residueLeftOnField.py +1 -2
  102. tests/models/koble2014/test_residueRemoved.py +1 -2
  103. tests/models/koble2014/test_utils.py +52 -0
  104. tests/models/site/test_management.py +117 -0
  105. tests/models/site/test_organicCarbonPerHa.py +51 -5
  106. tests/models/utils/test_blank_node.py +230 -34
  107. tests/models/utils/test_term.py +17 -3
  108. {hestia_earth_models-0.57.2.dist-info → hestia_earth_models-0.59.0.dist-info}/LICENSE +0 -0
  109. {hestia_earth_models-0.57.2.dist-info → hestia_earth_models-0.59.0.dist-info}/top_level.txt +0 -0
@@ -8,6 +8,7 @@ from functools import reduce
8
8
  from statistics import mode, mean
9
9
  from typing import (
10
10
  Any,
11
+ List,
11
12
  Callable,
12
13
  NamedTuple,
13
14
  Optional,
@@ -23,7 +24,7 @@ from hestia_earth.utils.tools import (
23
24
  )
24
25
 
25
26
  from ..log import debugValues, log_as_table
26
- from . import _filter_list_term_unit
27
+ from . import is_from_model, _filter_list_term_unit
27
28
  from .constant import Units
28
29
  from .property import get_node_property, get_node_property_value
29
30
  from .lookup import (
@@ -108,7 +109,7 @@ def run_if_required(model: str, term_id: str, data: dict, module):
108
109
  return getattr(module, 'run')(data) if is_run_required(model, _module_term_id(term_id, module), data) else []
109
110
 
110
111
 
111
- def find_terms_value(nodes: list, term_id: str):
112
+ def find_terms_value(nodes: list, term_id: str, default: Union[int, None] = 0):
112
113
  """
113
114
  Returns the sum of all blank nodes in the list which match the `Term` with the given `@id`.
114
115
 
@@ -124,7 +125,17 @@ def find_terms_value(nodes: list, term_id: str):
124
125
  float
125
126
  The total `value` as a number.
126
127
  """
127
- return list_sum(get_total_value(filter(lambda node: node.get('term', {}).get('@id') == term_id, nodes)))
128
+ return list_sum(get_total_value(filter(lambda node: node.get('term', {}).get('@id') == term_id, nodes)), default)
129
+
130
+
131
+ def has_gap_filled_by_ids(nodes: list, term_ids: List[str]):
132
+ nodes = [n for n in nodes if n.get('term', {}).get('@id') in term_ids]
133
+ return any([is_from_model(n) for n in nodes])
134
+
135
+
136
+ def has_original_by_ids(nodes: list, term_ids: List[str]):
137
+ nodes = [n for n in nodes if n.get('term', {}).get('@id') in term_ids]
138
+ return any([not is_from_model(n) for n in nodes])
128
139
 
129
140
 
130
141
  def get_total_value(nodes: list):
@@ -294,32 +305,35 @@ def get_P2O5_total(nodes: list) -> list:
294
305
  return get_total_value(kg_P_nodes) + get_total_value_converted(kg_N_nodes + kg_nodes, 'phosphateContentAsP2O5')
295
306
 
296
307
 
297
- def convert_to_nitrogen(node: dict, model: str, term_id: str, blank_nodes: list):
308
+ def convert_to_nitrogen(node: dict, model: str, term_id: str, blank_nodes: list, **log_args):
298
309
  def prop_value(input: dict):
299
- value = get_node_property_value(model, input, 'nitrogenContent')
300
- return value or get_node_property_value(model, input, 'crudeProteinContent', default=0) / 6.25
310
+ value = get_node_property_value(model, input, 'nitrogenContent', default=None, **log_args)
311
+ return value or get_node_property_value(model, input, 'crudeProteinContent', default=0, **log_args) / 6.25
301
312
 
302
313
  values = [(i, prop_value(i)) for i in blank_nodes]
303
314
  missing_nitrogen_property = [i.get('term', {}).get('@id') for i, p_value in values if not p_value]
304
315
 
305
316
  debugValues(node, model=model, term=term_id,
306
- missing_nitrogen_property=';'.join(set(missing_nitrogen_property)))
317
+ missing_nitrogen_property=';'.join(set(missing_nitrogen_property)),
318
+ **log_args)
307
319
 
308
320
  return list_sum([
309
321
  list_sum(i.get('value', [])) * p_value for i, p_value in values if p_value is not None
310
322
  ]) if len(missing_nitrogen_property) == 0 else None
311
323
 
312
324
 
313
- def convert_to_carbon(node: dict, model: str, term_id: str, blank_nodes: list):
325
+ def convert_to_carbon(node: dict, model: str, term_id: str, blank_nodes: list, **log_args):
314
326
  def prop_value(input: dict):
315
- value = get_node_property_value(model, input, 'carbonContent')
316
- return value or get_node_property_value(model, input, 'energyContentHigherHeatingValue', default=0) * 0.021
327
+ value = get_node_property_value(model, input, 'carbonContent', default=None, **log_args)
328
+ return value or \
329
+ get_node_property_value(model, input, 'energyContentHigherHeatingValue', default=0, **log_args) * 0.021
317
330
 
318
331
  values = [(i, prop_value(i)) for i in blank_nodes]
319
332
  missing_carbon_property = [i.get('term', {}).get('@id') for i, p_value in values if not p_value]
320
333
 
321
334
  debugValues(node, model=model, term=term_id,
322
- missing_carbon_property=';'.join(missing_carbon_property))
335
+ missing_carbon_property=';'.join(missing_carbon_property),
336
+ **log_args)
323
337
 
324
338
  return list_sum([
325
339
  list_sum(i.get('value', [])) * p_value for i, p_value in values if p_value is not None
@@ -399,7 +413,8 @@ def _retrieve_array_treatment(
399
413
  def get_node_value(
400
414
  node: dict,
401
415
  is_larger_unit: bool = False,
402
- array_treatment: Optional[ArrayTreatment] = None
416
+ array_treatment: Optional[ArrayTreatment] = None,
417
+ default: Any = 0
403
418
  ) -> Union[float, bool]:
404
419
  """
405
420
  Get the value from the dictionary representing the node,
@@ -427,7 +442,7 @@ def get_node_value(
427
442
  array_treatment or _retrieve_array_treatment(node, is_larger_unit=is_larger_unit)
428
443
  )] if isinstance(value, list) and len(value) > 0 else None
429
444
 
430
- return reducer(value) if reducer else value if isinstance(value, bool) else value or 0
445
+ return reducer(value) if reducer else value if isinstance(value, bool) else value or default
431
446
 
432
447
 
433
448
  def _convert_to_set(
@@ -653,7 +668,7 @@ def cumulative_nodes_lookup_match(
653
668
  )
654
669
 
655
670
 
656
- # --- Group nodes by year ---
671
+ # --- Blank Node date utils ---
657
672
 
658
673
 
659
674
  class DatestrFormat(Enum):
@@ -670,6 +685,16 @@ class DatestrFormat(Enum):
670
685
  MONTH_DAY = r"--%m-%d"
671
686
 
672
687
 
688
+ DATESTR_FORMAT_TO_EXPECTED_LENGTH = {
689
+ DatestrFormat.YEAR: len("2001"),
690
+ DatestrFormat.YEAR_MONTH: len("2001-01"),
691
+ DatestrFormat.YEAR_MONTH_DAY: len("2001-01-01"),
692
+ DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND: len("2001-01-01T00:00:00"),
693
+ DatestrFormat.MONTH: len("--01"),
694
+ DatestrFormat.MONTH_DAY: len("--01-01")
695
+ }
696
+
697
+
673
698
  DatestrGapfillMode = Enum("DatestrGapfillMode", [
674
699
  "START",
675
700
  "END"
@@ -698,16 +723,15 @@ end : datetime
698
723
  """
699
724
 
700
725
 
701
- def _check_datestr_format(datestr: str, format: Union[DatestrFormat, str]) -> bool:
726
+ def _check_datestr_format(datestr: str, format: DatestrFormat) -> bool:
702
727
  """
703
728
  Use `datetime.strptime` to determine if a datestr is in a particular ISO format.
704
729
  """
705
730
  try:
706
- date_format_str = (
707
- format.value if isinstance(format, DatestrFormat)
708
- else str(format)
709
- )
710
- return bool(datetime.strptime(str(datestr), date_format_str))
731
+ expected_length = DATESTR_FORMAT_TO_EXPECTED_LENGTH.get(format, 0)
732
+ format_str = format.value
733
+ parsed_datetime = datetime.strptime(datestr, format_str)
734
+ return bool(parsed_datetime) and len(datestr) == expected_length
711
735
  except ValueError:
712
736
  return False
713
737
 
@@ -771,21 +795,25 @@ def _datetime_within_range(datetime: datetime, range: DatetimeRange) -> bool:
771
795
  """
772
796
  Determine whether or not a `datetime` falls within a `DatetimeRange`.
773
797
  """
774
- return range.start < datetime < range.end
798
+ return range.start <= datetime <= range.end
775
799
 
776
800
 
777
- def _datetime_range_duration(range: DatetimeRange) -> float:
801
+ def _datetime_range_duration(range: DatetimeRange, add_second=False) -> float:
778
802
  """
779
803
  Determine the length of a `DatetimeRange` in seconds.
804
+
805
+ Option to `add_second` to account for 1 second between 23:59:59 and 00:00:00)
780
806
  """
781
- return (range.end - range.start).total_seconds()
807
+ return (range.end - range.start).total_seconds() + int(add_second)
782
808
 
783
809
 
784
810
  def _calc_datetime_range_intersection_duration(
785
- range_a: DatetimeRange, range_b: DatetimeRange
811
+ range_a: DatetimeRange, range_b: DatetimeRange, add_second=False
786
812
  ) -> float:
787
813
  """
788
814
  Determine the length of a `DatetimeRange` in seconds.
815
+
816
+ Option to `add_second` to account for 1 second between 23:59:59 and 00:00:00)
789
817
  """
790
818
  latest_start = max(range_a.start, range_b.start)
791
819
  earliest_end = min(range_a.end, range_b.end)
@@ -795,14 +823,156 @@ def _calc_datetime_range_intersection_duration(
795
823
  end=earliest_end
796
824
  )
797
825
 
826
+ duration = _datetime_range_duration(intersection_range)
827
+
798
828
  # if less than 0 the ranges do not intersect, so return 0.
799
- return max(0, _datetime_range_duration(intersection_range))
829
+ return (
830
+ _datetime_range_duration(intersection_range) + int(add_second)
831
+ if duration > 0 else 0
832
+ )
833
+
834
+
835
+ # --- Group nodes by year ---
836
+
837
+
838
+ VALID_DATE_FORMATS_GROUP_NODES_BY_YEAR = {
839
+ DatestrFormat.YEAR,
840
+ DatestrFormat.YEAR_MONTH,
841
+ DatestrFormat.YEAR_MONTH_DAY,
842
+ DatestrFormat.YEAR_MONTH_DAY_HOUR_MINUTE_SECOND
843
+ }
844
+
845
+
846
+ GroupNodesByYearMode = Enum("GroupNodesByYearMode", [
847
+ "START_AND_END_DATE",
848
+ "DATES"
849
+ ])
850
+ """
851
+ Enum representing modes of grouping nodes by year.
852
+
853
+ Members
854
+ -------
855
+ START_AND_END_DATE
856
+ Use the `startDate` and `endDate` fields of the node.
857
+ DATES
858
+ Use the `dates` field of the node.
859
+ """
860
+
861
+
862
+ def _should_run_node_by_end_date(node: dict) -> bool:
863
+ """
864
+ Validate nodes for `group_nodes_by_year` using the "startDate" and "endDate" fields.
865
+ """
866
+ return _get_datestr_format(node.get("endDate")) in VALID_DATE_FORMATS_GROUP_NODES_BY_YEAR
800
867
 
801
868
 
802
- def _validate_intersection_threshold(
803
- fraction_of_year: float,
804
- fraction_of_node_duration: float,
805
- is_final_year: bool
869
+ def _should_run_node_by_dates(node: dict) -> bool:
870
+ """
871
+ Validate nodes for `group_nodes_by_year` using the "dates" field.
872
+ """
873
+ value = node.get("value")
874
+ dates = node.get("dates")
875
+ return (
876
+ value and dates and len(dates) > 0 and len(value) == len(dates)
877
+ and all(_get_datestr_format(datestr) in VALID_DATE_FORMATS_GROUP_NODES_BY_YEAR for datestr in node.get("dates"))
878
+ )
879
+
880
+
881
+ GROUP_NODES_BY_YEAR_MODE_TO_SHOULD_RUN_NODE_FUNCTION = {
882
+ GroupNodesByYearMode.START_AND_END_DATE: _should_run_node_by_end_date,
883
+ GroupNodesByYearMode.DATES: _should_run_node_by_dates
884
+ }
885
+
886
+
887
+ def _get_node_datetime_range_from_start_and_end_date(
888
+ node: dict, default_node_duration: int = 1
889
+ ) -> Union[DatetimeRange, None]:
890
+ """
891
+ Get the datetime range from a node's "startDate" and "endDate" fields.
892
+
893
+ If "startDate" field is not available, a start date is calculated using the end date
894
+ and `default_node_duration`.
895
+ """
896
+ end = safe_parse_date(_gapfill_datestr(node.get("endDate"), DatestrGapfillMode.END))
897
+ start = (
898
+ safe_parse_date(_gapfill_datestr(node.get("startDate"), DatestrGapfillMode.START))
899
+ or end - relativedelta(years=default_node_duration, seconds=-1) if end else None
900
+ )
901
+
902
+ valid = isinstance(start, datetime) and isinstance(end, datetime)
903
+ return DatetimeRange(start, end) if valid else None
904
+
905
+
906
+ def _get_node_datetime_range_from_dates(
907
+ node: dict, **_
908
+ ) -> Union[DatetimeRange, None]:
909
+ """
910
+ Get the datetime range from a node's "dates" field.
911
+ """
912
+ dates = node.get("dates")
913
+ end = max(
914
+ non_empty_list(
915
+ safe_parse_date(_gapfill_datestr(datestr, DatestrGapfillMode.END)) for datestr in dates
916
+ ), default=None
917
+ )
918
+ start = min(
919
+ non_empty_list(
920
+ safe_parse_date(_gapfill_datestr(datestr, DatestrGapfillMode.START)) for datestr in dates
921
+ ), default=None
922
+ )
923
+
924
+ valid = isinstance(start, datetime) and isinstance(end, datetime)
925
+ return DatetimeRange(start, end) if valid else None
926
+
927
+
928
+ GROUP_NODES_BY_YEAR_MODE_TO_GET_DATETIME_RANGE_FUNCTION = {
929
+ GroupNodesByYearMode.START_AND_END_DATE: _get_node_datetime_range_from_start_and_end_date,
930
+ GroupNodesByYearMode.DATES: _get_node_datetime_range_from_dates
931
+ }
932
+
933
+
934
+ def _build_time_fraction_dict(
935
+ group_datetime_range: DatetimeRange,
936
+ node_datetime_range: DatetimeRange
937
+ ) -> dict:
938
+ """
939
+ Build a dictionary containing fractions of the year and node duration based on datetime ranges.
940
+
941
+ This function calculates the duration of the group or year, the duration of the node, and the intersection
942
+ duration between the two. It then computes the fractions of the year and node duration represented by the
943
+ intersection. The results are returned in a dictionary.
944
+
945
+ Parameters
946
+ ----------
947
+ group_datetime_range : DatetimeRange
948
+ The datetime range representing the entire group or year.
949
+ node_datetime_range : DatetimeRange
950
+ The datetime range representing the node.
951
+
952
+ Returns
953
+ -------
954
+ dict
955
+ A dictionary containing "fraction_of_group_duration" and "fraction_of_node_duration".
956
+ """
957
+ group_duration = _datetime_range_duration(group_datetime_range, add_second=True)
958
+ node_duration = _datetime_range_duration(node_datetime_range, add_second=True)
959
+
960
+ intersection_duration = _calc_datetime_range_intersection_duration(
961
+ node_datetime_range, group_datetime_range, add_second=True
962
+ )
963
+
964
+ fraction_of_group_duration = intersection_duration / group_duration
965
+ fraction_of_node_duration = intersection_duration / node_duration
966
+
967
+ return {
968
+ "fraction_of_group_duration": fraction_of_group_duration,
969
+ "fraction_of_node_duration": fraction_of_node_duration
970
+ }
971
+
972
+
973
+ def _validate_time_fraction_dict(
974
+ time_fraction_dict: dict,
975
+ is_final_group: bool
806
976
  ) -> bool:
807
977
  """
808
978
  Return `True` if the the node intersections with a year group by
@@ -814,24 +984,54 @@ def _validate_intersection_threshold(
814
984
  be counted in the year group if the majority of that node takes place in
815
985
  that year.
816
986
  """
817
- FRACTION_OF_YEAR_THRESHOLD = 0.3
987
+ FRACTION_OF_GROUP_DURATION_THRESHOLD = 0.3
818
988
  FRACTION_OF_NODE_DURATION_THRESHOLD = 0.5
819
989
 
820
- return (
821
- fraction_of_year > FRACTION_OF_YEAR_THRESHOLD
822
- or fraction_of_node_duration > FRACTION_OF_NODE_DURATION_THRESHOLD
823
- or (is_final_year and fraction_of_node_duration == FRACTION_OF_NODE_DURATION_THRESHOLD)
824
- )
990
+ return any([
991
+ time_fraction_dict["fraction_of_group_duration"] > FRACTION_OF_GROUP_DURATION_THRESHOLD,
992
+ time_fraction_dict["fraction_of_node_duration"] > FRACTION_OF_NODE_DURATION_THRESHOLD,
993
+ is_final_group and time_fraction_dict["fraction_of_node_duration"] == FRACTION_OF_NODE_DURATION_THRESHOLD
994
+ ])
995
+
996
+
997
+ def _build_update_dict(node: dict, years: list, target_year: int) -> dict:
998
+ """
999
+ Build an update dictionary containing values and dates from a node that fall within a given year.
1000
+
1001
+ This is only required if when `group_nodes_by_year` `mode = GroupNodesByYearMode.DATES`
1002
+
1003
+ Parameters
1004
+ ----------
1005
+ node : dict
1006
+ The node containing values and dates.
1007
+ year : int
1008
+ The year to be matched.
1009
+
1010
+ Returns
1011
+ -------
1012
+ dict
1013
+ An update dictionary containing "value" and "dates" keys.
1014
+ """
1015
+ valid_indices = {
1016
+ i for i, y in enumerate(years) if y == target_year
1017
+ }
1018
+ return {
1019
+ "value": [node.get("value")[i] for i in valid_indices],
1020
+ "dates": [node.get("dates")[i] for i in valid_indices]
1021
+ }
825
1022
 
826
1023
 
827
1024
  def group_nodes_by_year(
828
1025
  nodes: list[dict],
829
1026
  default_node_duration: int = 1,
830
- sort_result: bool = True
1027
+ sort_result: bool = True,
1028
+ inner_key: Union[Any, None] = None,
1029
+ mode: GroupNodesByYearMode = GroupNodesByYearMode.START_AND_END_DATE
831
1030
  ) -> dict[int, list[dict]]:
832
1031
  """
833
- Group nodes by year based on their start and end dates. Incomplete date strings are gap-filled automatically
834
- using `_gapfill_datestr` function.
1032
+ Group nodes by year based on either their "startDate" and "endDate" fields or their
1033
+ "dates" field. Incomplete date strings are gap-filled automatically using `_gapfill_datestr`
1034
+ function.
835
1035
 
836
1036
  Parameters
837
1037
  ----------
@@ -841,32 +1041,38 @@ def group_nodes_by_year(
841
1041
  Default duration of a node years if start date is not available, by default 1.
842
1042
  sort_result : bool, optional
843
1043
  Flag to sort the result by year, by default True.
1044
+ inner_key: Any | None
1045
+ An optional inner dictionary key for the outputted annualised groups (can be used to merge annualised
1046
+ dictionaries together), default value: `None`.
1047
+ mode : GroupNodesByYearMode, optional
1048
+ The mode to determine how nodes are grouped by year. Options are defined in `GroupNodesByYearMode`.
844
1049
 
845
1050
  Returns
846
1051
  -------
847
1052
  dict[int, list[dict]]
848
1053
  A dictionary where keys are years and values are lists of nodes.
849
1054
  """
850
- def group_node(groups: dict, index: int):
851
- node = nodes[index]
852
1055
 
853
- end_datestr = _gapfill_datestr(node.get("endDate"), DatestrGapfillMode.END)
854
- start_datestr = _gapfill_datestr(node.get("startDate"), DatestrGapfillMode.START)
1056
+ should_run_node = GROUP_NODES_BY_YEAR_MODE_TO_SHOULD_RUN_NODE_FUNCTION[mode]
1057
+ get_node_datetime_range = GROUP_NODES_BY_YEAR_MODE_TO_GET_DATETIME_RANGE_FUNCTION[mode]
1058
+
1059
+ valid_nodes = [node for node in nodes if should_run_node(node)]
855
1060
 
856
- end = safe_parse_date(end_datestr)
857
- start = (
858
- safe_parse_date(start_datestr)
859
- or end - relativedelta(years=default_node_duration, seconds=-1)
860
- if bool(end) else None
1061
+ def group_node(groups: dict, index: int):
1062
+ node = valid_nodes[index]
1063
+
1064
+ node_datetime_range = get_node_datetime_range(
1065
+ node, default_node_duration=default_node_duration
861
1066
  )
862
1067
 
863
- node_datetime_range = DatetimeRange(
864
- start=start,
865
- end=end
1068
+ # pre-parse the "dates" field so it doesn't get re-calculated in each iteration of the for-loop
1069
+ years = (
1070
+ [safe_parse_date(datestr).year for datestr in node.get("dates", [])]
1071
+ if mode == GroupNodesByYearMode.DATES else []
866
1072
  )
867
1073
 
868
- range_end = end.year + 1 if end else 0
869
- range_start = start.year if start else 0
1074
+ range_start = node_datetime_range.start.year if node_datetime_range else 0
1075
+ range_end = node_datetime_range.end.year + 1 if node_datetime_range else 0
870
1076
 
871
1077
  for year in range(range_start, range_end):
872
1078
 
@@ -877,32 +1083,148 @@ def group_nodes_by_year(
877
1083
 
878
1084
  is_final_year = _datetime_within_range(node_datetime_range.end, group_datetime_range)
879
1085
 
880
- # add 1 to durations if datestrs gap filled (to account for 1 second between 23:59:59 and 00:00:00)
881
- year_duration = _datetime_range_duration(group_datetime_range) + 1
882
- node_duration = _datetime_range_duration(node_datetime_range) + 1
883
- intersection_duration = (
884
- _calc_datetime_range_intersection_duration(node_datetime_range, group_datetime_range) + 1
1086
+ time_fraction_dict = _build_time_fraction_dict(group_datetime_range, node_datetime_range)
1087
+ update_dict = _build_update_dict(node, years, year) if mode == GroupNodesByYearMode.DATES else {}
1088
+
1089
+ should_run = (
1090
+ mode == GroupNodesByYearMode.DATES
1091
+ or _validate_time_fraction_dict(
1092
+ time_fraction_dict,
1093
+ is_final_year
1094
+ )
885
1095
  )
886
1096
 
887
- fraction_of_year = intersection_duration / year_duration
888
- fraction_of_node_duration = intersection_duration / node_duration
1097
+ should_run and groups[year].append(
1098
+ node | time_fraction_dict | update_dict
1099
+ )
889
1100
 
890
- time_fraction_dict = {
891
- "fraction_of_year": fraction_of_year,
892
- "fraction_of_node_duration": fraction_of_node_duration
893
- }
1101
+ return groups
894
1102
 
895
- _node = node | time_fraction_dict
1103
+ grouped = reduce(group_node, range(len(valid_nodes)), defaultdict(list))
896
1104
 
897
- should_run = _validate_intersection_threshold(
898
- fraction_of_year,
899
- fraction_of_node_duration,
900
- is_final_year
901
- )
1105
+ iterated = {
1106
+ year: {inner_key: group} if inner_key else group
1107
+ for year, group in grouped.items()
1108
+ }
1109
+
1110
+ return dict(sorted(iterated.items())) if sort_result else iterated
1111
+
1112
+
1113
+ def group_nodes_by_year_and_month(
1114
+ nodes: list[dict],
1115
+ default_node_duration: int = 1,
1116
+ sort_result: bool = True,
1117
+ inner_key: Union[Any, None] = None
1118
+ ) -> dict[int, list[dict]]:
1119
+ """
1120
+ Group nodes by year based on either their "startDate" and "endDate" fields. Incomplete date strings are gap-filled
1121
+ automatically using `_gapfill_datestr` function.
1122
+
1123
+ Returns a dict in the shape:
1124
+ ```
1125
+ {
1126
+ year (int): {
1127
+ month (int): nodes (list[dict]) # for each month 1 - 12
1128
+ }
1129
+ }
1130
+ ```
1131
+
1132
+ Parameters
1133
+ ----------
1134
+ nodes : list[dict]
1135
+ A list of nodes with start and end date information.
1136
+ default_node_duration : int, optional
1137
+ Default duration of a node years if start date is not available, by default 1.
1138
+ sort_result : bool, optional
1139
+ Flag to sort the result by year, by default True.
1140
+ inner_key: Any | None
1141
+ An optional inner dictionary key for the outputted annualised groups (can be used to merge annualised
1142
+ dictionaries together), default value: `None`.
1143
+
1144
+ Returns
1145
+ -------
1146
+ dict[int, list[dict]]
1147
+ A dictionary where keys are years and values are lists of nodes.
1148
+ """
1149
+ valid_nodes = [node for node in nodes if _should_run_node_by_end_date(node)]
1150
+
1151
+ def group_node(groups: dict, index: int):
1152
+ node = valid_nodes[index]
1153
+
1154
+ node_datetime_range = _get_node_datetime_range_from_start_and_end_date(
1155
+ node, default_node_duration=default_node_duration
1156
+ )
1157
+
1158
+ range_start = node_datetime_range.start.year if node_datetime_range else 0
1159
+ range_end = node_datetime_range.end.year + 1 if node_datetime_range else 0
1160
+
1161
+ for year in range(range_start, range_end):
1162
+ for month in range(1, 13):
902
1163
 
903
- should_run and groups[year].append(_node)
1164
+ group_datetime_range = DatetimeRange(
1165
+ start=safe_parse_date(_gapfill_datestr(f"{year}-{month:02}", DatestrGapfillMode.START)),
1166
+ end=safe_parse_date(_gapfill_datestr(f"{year}-{month}", DatestrGapfillMode.END))
1167
+ )
1168
+
1169
+ is_final_month = _datetime_within_range(node_datetime_range.end, group_datetime_range)
1170
+ time_fraction_dict = _build_time_fraction_dict(group_datetime_range, node_datetime_range)
1171
+ should_run = _validate_time_fraction_dict(time_fraction_dict, is_final_month)
1172
+
1173
+ should_run and groups[year][month].append(node)
904
1174
 
905
1175
  return groups
906
1176
 
907
- grouped = reduce(group_node, range(len(nodes)), defaultdict(list))
908
- return dict(sorted(grouped.items())) if sort_result else grouped
1177
+ grouped = reduce(group_node, range(len(valid_nodes)), defaultdict(lambda: defaultdict(list)))
1178
+
1179
+ iterated = {
1180
+ year: {inner_key: dict(group)} if inner_key else dict(group)
1181
+ for year, group in grouped.items()
1182
+ }
1183
+
1184
+ return dict(sorted(iterated.items())) if sort_result else iterated
1185
+
1186
+
1187
+ # --- Group nodes by last date ---
1188
+
1189
+
1190
+ def _get_last_date(datestrs: list[str]) -> Optional[str]:
1191
+ """
1192
+ Reduce a datestrs down to a single datestr by selecting the last one.
1193
+
1194
+ Parameters
1195
+ ----------
1196
+ datestrs : list
1197
+ A list of datestrings, e.g. the value of a node's `dates` field.
1198
+
1199
+ Returns
1200
+ -------
1201
+ str | None
1202
+ Returns the latest datestr or `None` if no valid datestr in list.
1203
+
1204
+ """
1205
+ return sorted(datestrs)[-1] if len(datestrs) > 0 else None
1206
+
1207
+
1208
+ def group_nodes_by_last_date(nodes: list) -> dict[str, list[dict]]:
1209
+ """
1210
+ Group a list of nodes by the last date of their `dates` field. Nodes with no `dates` field will be sorted into
1211
+ the `no-dates` group.
1212
+
1213
+ Parameters
1214
+ ----------
1215
+ nodes : list[dict]
1216
+ A list of Hestia format nodes.
1217
+
1218
+ Return
1219
+ ------
1220
+ dict
1221
+ A dictionary of nodes grouped by latest date, in the format `{date: list[node]}`.
1222
+ """
1223
+ DEFAULT_KEY = 'no-dates'
1224
+
1225
+ def group_by(group: dict, node: dict):
1226
+ dates = node.get('dates', [])
1227
+ key = _get_last_date(dates) or DEFAULT_KEY
1228
+ return group | {key: group.get(key, []) + [node]}
1229
+
1230
+ return reduce(group_by, nodes, {})
@@ -0,0 +1,13 @@
1
+ PRODUCT_ID_TO_PRACTICES_ID = [
2
+ {'product': 'aboveGroundCropResidueRemoved', 'practices': ['residueRemoved']},
3
+ {'product': 'aboveGroundCropResidueIncorporated', 'practices': [
4
+ 'residueIncorporated',
5
+ 'residueIncorporatedLessThan30DaysBeforeCultivation',
6
+ 'residueIncorporatedMoreThan30DaysBeforeCultivation'
7
+ ]},
8
+ {'product': 'aboveGroundCropResidueBurnt', 'practices': ['residueBurnt']},
9
+ {'product': 'aboveGroundCropResidueLeftOnField', 'practices': ['residueLeftOnField']}
10
+ ]
11
+
12
+
13
+ def crop_residue_product_ids(): return [v.get('product') for v in PRODUCT_ID_TO_PRACTICES_ID]