hestia-earth-models 0.64.4__py3-none-any.whl → 0.64.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of hestia-earth-models might be problematic. Click here for more details.

Files changed (62) hide show
  1. hestia_earth/models/blonkConsultants2016/ch4ToAirNaturalVegetationBurning.py +5 -9
  2. hestia_earth/models/blonkConsultants2016/co2ToAirAboveGroundBiomassStockChangeLandUseChange.py +5 -9
  3. hestia_earth/models/blonkConsultants2016/n2OToAirNaturalVegetationBurningDirect.py +6 -13
  4. hestia_earth/models/cycle/animal/input/properties.py +6 -0
  5. hestia_earth/models/cycle/completeness/soilAmendment.py +3 -2
  6. hestia_earth/models/cycle/concentrateFeed.py +10 -4
  7. hestia_earth/models/cycle/input/properties.py +6 -0
  8. hestia_earth/models/cycle/liveAnimal.py +2 -2
  9. hestia_earth/models/cycle/milkYield.py +3 -3
  10. hestia_earth/models/cycle/otherSitesArea.py +59 -0
  11. hestia_earth/models/cycle/otherSitesUnusedDuration.py +9 -8
  12. hestia_earth/models/cycle/pastureSystem.py +3 -2
  13. hestia_earth/models/cycle/product/properties.py +6 -0
  14. hestia_earth/models/cycle/siteArea.py +83 -0
  15. hestia_earth/models/cycle/stockingDensityAnimalHousingAverage.py +28 -16
  16. hestia_earth/models/cycle/utils.py +1 -1
  17. hestia_earth/models/environmentalFootprintV3/soilQualityIndexLandOccupation.py +128 -0
  18. hestia_earth/models/environmentalFootprintV3/utils.py +17 -0
  19. hestia_earth/models/ipcc2006/co2ToAirOrganicSoilCultivation.py +17 -6
  20. hestia_earth/models/ipcc2006/n2OToAirOrganicSoilCultivationDirect.py +17 -6
  21. hestia_earth/models/ipcc2019/co2ToAirCarbonStockChange_utils.py +904 -0
  22. hestia_earth/models/ipcc2019/co2ToAirSoilOrganicCarbonStockChangeManagementChange.py +70 -618
  23. hestia_earth/models/mocking/search-results.json +395 -323
  24. hestia_earth/models/pooreNemecek2018/saplings.py +10 -7
  25. hestia_earth/models/site/management.py +18 -14
  26. hestia_earth/models/utils/__init__.py +38 -0
  27. hestia_earth/models/utils/array_builders.py +63 -52
  28. hestia_earth/models/utils/blank_node.py +137 -82
  29. hestia_earth/models/utils/descriptive_stats.py +3 -239
  30. hestia_earth/models/utils/feedipedia.py +15 -2
  31. hestia_earth/models/utils/landCover.py +9 -0
  32. hestia_earth/models/utils/lookup.py +13 -2
  33. hestia_earth/models/utils/measurement.py +3 -28
  34. hestia_earth/models/utils/stats.py +429 -0
  35. hestia_earth/models/utils/term.py +15 -3
  36. hestia_earth/models/utils/time_series.py +90 -0
  37. hestia_earth/models/version.py +1 -1
  38. {hestia_earth_models-0.64.4.dist-info → hestia_earth_models-0.64.5.dist-info}/METADATA +1 -1
  39. {hestia_earth_models-0.64.4.dist-info → hestia_earth_models-0.64.5.dist-info}/RECORD +62 -48
  40. tests/models/blonkConsultants2016/test_ch4ToAirNaturalVegetationBurning.py +2 -2
  41. tests/models/blonkConsultants2016/test_co2ToAirAboveGroundBiomassStockChangeLandUseChange.py +2 -2
  42. tests/models/blonkConsultants2016/test_n2OToAirNaturalVegetationBurningDirect.py +2 -2
  43. tests/models/cycle/completeness/test_soilAmendment.py +1 -1
  44. tests/models/cycle/test_liveAnimal.py +1 -1
  45. tests/models/cycle/test_milkYield.py +1 -1
  46. tests/models/cycle/test_otherSitesArea.py +68 -0
  47. tests/models/cycle/test_siteArea.py +51 -0
  48. tests/models/cycle/test_stockingDensityAnimalHousingAverage.py +2 -2
  49. tests/models/environmentalFootprintV3/test_soilQualityIndexLandOccupation.py +136 -0
  50. tests/models/ipcc2019/test_co2ToAirCarbonStockChange_utils.py +50 -0
  51. tests/models/ipcc2019/test_co2ToAirSoilOrganicCarbonStockChangeManagementChange.py +1 -39
  52. tests/models/pooreNemecek2018/test_saplings.py +1 -1
  53. tests/models/site/test_management.py +3 -153
  54. tests/models/utils/test_array_builders.py +67 -6
  55. tests/models/utils/test_blank_node.py +191 -7
  56. tests/models/utils/test_descriptive_stats.py +2 -86
  57. tests/models/utils/test_measurement.py +1 -22
  58. tests/models/utils/test_stats.py +186 -0
  59. tests/models/utils/test_time_series.py +88 -0
  60. {hestia_earth_models-0.64.4.dist-info → hestia_earth_models-0.64.5.dist-info}/LICENSE +0 -0
  61. {hestia_earth_models-0.64.4.dist-info → hestia_earth_models-0.64.5.dist-info}/WHEEL +0 -0
  62. {hestia_earth_models-0.64.4.dist-info → hestia_earth_models-0.64.5.dist-info}/top_level.txt +0 -0
@@ -11,6 +11,7 @@ from .plantationLifespan import TERM_ID as PRACTICE_TERM_ID
11
11
  REQUIREMENTS = {
12
12
  "Cycle": {
13
13
  "completeness.other": "False",
14
+ "cycleDuration": "> 0",
14
15
  "products": [{"@type": "Product", "value": "", "term.termType": "crop"}],
15
16
  "practices": [{"@type": "Practice", "value": "", "term.@id": "plantationLifespan"}]
16
17
  }
@@ -37,15 +38,16 @@ def _get_value(product: dict):
37
38
  return safe_parse_float(get_crop_lookup_value(MODEL, TERM_ID, term_id, LOOKUPS['crop']), None)
38
39
 
39
40
 
40
- def _run(product: dict, plantation_duration: float):
41
+ def _run(product: dict, plantation_duration: float, cycleDuration: float):
41
42
  value = _get_value(product)
42
- return [_input(value / plantation_duration)]
43
+ return [_input(value / plantation_duration * cycleDuration)]
43
44
 
44
45
 
45
46
  def _should_run_product(product: dict): return _get_value(product) is not None
46
47
 
47
48
 
48
49
  def _should_run(cycle: dict):
50
+ cycleDuration = cycle.get('cycleDuration')
49
51
  term_type_incomplete = _is_term_type_incomplete(cycle, TERM_ID)
50
52
  product = next((p for p in cycle.get('products', []) if _should_run_product(p)), None)
51
53
  plantation_duration = list_sum(find_term_match(cycle.get('practices', []), PRACTICE_TERM_ID).get('value'), None)
@@ -53,13 +55,14 @@ def _should_run(cycle: dict):
53
55
  logRequirements(cycle, model=MODEL, term=TERM_ID,
54
56
  term_type_seed_incomplete=term_type_incomplete,
55
57
  product_id=(product or {}).get('term', {}).get('@id'),
56
- plantation_duration=plantation_duration)
58
+ plantation_duration=plantation_duration,
59
+ cycleDuration=cycleDuration)
57
60
 
58
- should_run = all([term_type_incomplete, product, plantation_duration])
61
+ should_run = all([term_type_incomplete, product, plantation_duration, (cycleDuration or 0) > 0])
59
62
  logShouldRun(cycle, MODEL, TERM_ID, should_run)
60
- return should_run, product, plantation_duration
63
+ return should_run, product, plantation_duration, cycleDuration
61
64
 
62
65
 
63
66
  def run(cycle: dict):
64
- should_run, product, plantation_duration = _should_run(cycle)
65
- return _run(product, plantation_duration) if should_run else []
67
+ should_run, product, plantation_duration, cycleDuration = _should_run(cycle)
68
+ return _run(product, plantation_duration, cycleDuration) if should_run else []
@@ -160,16 +160,18 @@ def _get_landCover_term_id(product: dict) -> str:
160
160
 
161
161
 
162
162
  def _get_relevant_items(
163
- cycles: list[dict], item_name: str, relevant_terms: list, date_fill: callable = _default_dates
164
- ) -> list:
163
+ cycles: list[dict], item_name: str, relevant_terms: list, date_fill: callable = _default_dates
164
+ ):
165
165
  """
166
166
  Get items from the list of cycles with any of the relevant terms.
167
167
  Also adds dates if missing.
168
168
  """
169
169
  return [
170
- item
170
+ [
171
+ item
172
+ for item in date_fill(cycle=cycle, values=filter_list_term_type(cycle.get(item_name, []), relevant_terms))
173
+ ]
171
174
  for cycle in cycles
172
- for item in date_fill(cycle=cycle, values=filter_list_term_type(cycle.get(item_name, []), relevant_terms))
173
175
  ]
174
176
 
175
177
 
@@ -230,18 +232,19 @@ def _has_gap_fill_to_management_set(practices: list) -> list:
230
232
 
231
233
 
232
234
  def _should_run_all_products(cycles: list, site_type: str):
235
+ products_land_cover = flatten(_get_relevant_items(
236
+ cycles=cycles,
237
+ item_name="products",
238
+ relevant_terms=[TermTermType.LANDCOVER]
239
+ )) if site_type else []
233
240
  products_land_cover = [
234
241
  _extract_node_value(
235
242
  _include(
236
243
  value=product,
237
244
  keys=["term", "value", "startDate", "endDate", "properties"]
238
245
  )
239
- ) for product in _get_relevant_items(
240
- cycles=cycles,
241
- item_name="products",
242
- relevant_terms=[TermTermType.LANDCOVER]
243
- )
244
- ] if site_type else []
246
+ ) for product in products_land_cover
247
+ ]
245
248
 
246
249
  products_crop_forage = _get_relevant_items(
247
250
  cycles=cycles,
@@ -255,10 +258,11 @@ def _should_run_all_products(cycles: list, site_type: str):
255
258
  keys=["startDate", "endDate", "properties"],
256
259
  dest={
257
260
  "term": linked_node(download_hestia(_get_landCover_term_id(product))),
258
- "value": 100
261
+ "value": round(100 / len(_products), 2)
259
262
  }
260
263
  )
261
- for product in list(filter(_get_landCover_term_id, [i for i in products_crop_forage]))
264
+ for _products in products_crop_forage
265
+ for product in list(filter(_get_landCover_term_id, _products))
262
266
  ] if site_type else []
263
267
  dates = sorted(list(set(
264
268
  non_empty_list(flatten([[cycle.get('startDate'), cycle.get('endDate')] for cycle in cycles]))
@@ -293,7 +297,7 @@ def _should_run(site: dict):
293
297
  value=practice,
294
298
  keys=["term", "value", "startDate", "endDate"]
295
299
  )
296
- ) for practice in _get_relevant_items(
300
+ ) for practice in flatten(_get_relevant_items(
297
301
  cycles=cycles,
298
302
  item_name="practices",
299
303
  relevant_terms=[
@@ -303,7 +307,7 @@ def _should_run(site: dict):
303
307
  TermTermType.LANDUSEMANAGEMENT,
304
308
  TermTermType.SYSTEM
305
309
  ]
306
- )
310
+ ))
307
311
  ]
308
312
  practices = _has_gap_fill_to_management_set(practices)
309
313
  practices = condense_nodes(practices)
@@ -1,6 +1,8 @@
1
1
  from os.path import dirname, abspath
2
2
  from collections.abc import Generator, Iterable
3
3
  from itertools import tee
4
+ from decimal import Decimal
5
+ from statistics import mean
4
6
  import sys
5
7
  import datetime
6
8
  from functools import reduce
@@ -10,6 +12,7 @@ from hestia_earth.schema import SchemaType
10
12
  from hestia_earth.utils.api import download_hestia
11
13
  from hestia_earth.utils.model import linked_node
12
14
  from hestia_earth.utils.tools import flatten, non_empty_list
15
+ from hestia_earth.utils.date import is_in_days, is_in_months
13
16
 
14
17
  from .constant import Units
15
18
 
@@ -94,6 +97,23 @@ def multiply_values(values: list):
94
97
  return reduce(operator.mul, filtered_values, 1) if len(filtered_values) > 1 else None
95
98
 
96
99
 
100
+ def _numeric_weighted_average(values: list):
101
+ total_weight = sum(Decimal(str(weight)) for _v, weight in values) if values else Decimal(0)
102
+ weighted_values = [Decimal(str(value)) * Decimal(str(weight)) for value, weight in values]
103
+ average = sum(weighted_values) / (total_weight if total_weight else 1) if weighted_values else None
104
+ return None if average is None else float(average)
105
+
106
+
107
+ def _bool_weighted_average(values: list):
108
+ return mean(map(int, values)) >= 0.5
109
+
110
+
111
+ def weighted_average(weighted_values: list):
112
+ values = [v for v, _w in weighted_values]
113
+ all_boolean = all([isinstance(v, bool) for v in values])
114
+ return _bool_weighted_average(values) if all_boolean else _numeric_weighted_average(weighted_values)
115
+
116
+
97
117
  def term_id_prefix(term_id: str): return term_id.split('Kg')[0]
98
118
 
99
119
 
@@ -156,3 +176,21 @@ def pairwise(iterable):
156
176
  a, b = tee(iterable)
157
177
  next(b, None)
158
178
  return zip(a, b)
179
+
180
+
181
+ def full_date_str(date_str: str, is_end: bool = False):
182
+ """
183
+ Return the date in format YYY-MM-dd, by setting the month and day if they are not provided.
184
+ """
185
+ return date_str if is_in_days(date_str) else (
186
+ f"{date_str}-{14 if is_end else 15}" if is_in_months(date_str)
187
+ else f"{date_str}-{'12-31' if is_end else '01-01'}"
188
+ )
189
+
190
+
191
+ def days_to_years(days):
192
+ return days / 365
193
+
194
+
195
+ def hectar_to_square_meter(value):
196
+ return value * 10000
@@ -3,11 +3,11 @@ Based on code by Cool Farm Tool:
3
3
  https://gitlab.com/MethodsCFT/coolfarm-soc/-/blob/main/src/cfasoc/builders.py
4
4
  """
5
5
  import hashlib
6
- from numpy import array, concatenate, cumsum, full, hstack, random, mean, prod, vstack
6
+ from numpy import cumsum, dot, full, linalg, hstack, random, mean, vstack
7
7
  from numpy.typing import NDArray, DTypeLike
8
8
  from typing import Union
9
9
 
10
- from .descriptive_stats import calc_z_critical
10
+ from .stats import calc_z_critical, truncnorm_rvs
11
11
 
12
12
 
13
13
  def repeat_single(shape: tuple, value: float, dtype: DTypeLike = None) -> NDArray:
@@ -31,7 +31,7 @@ def repeat_single(shape: tuple, value: float, dtype: DTypeLike = None) -> NDArra
31
31
  return full(shape=shape, fill_value=value, dtype=dtype)
32
32
 
33
33
 
34
- def repeat_array_as_columns(n_iterations: int, array: NDArray) -> NDArray:
34
+ def repeat_array_as_columns(n_iterations: int, arr: NDArray) -> NDArray:
35
35
  """
36
36
  Repeat a numpy array horizontally as columns.
37
37
 
@@ -39,7 +39,7 @@ def repeat_array_as_columns(n_iterations: int, array: NDArray) -> NDArray:
39
39
  ----------
40
40
  n_iterations : int
41
41
  Number of times the columns should be repeated.
42
- array : NDArray
42
+ arr : NDArray
43
43
  Array to repeat.
44
44
 
45
45
  Returns
@@ -47,10 +47,10 @@ def repeat_array_as_columns(n_iterations: int, array: NDArray) -> NDArray:
47
47
  NDArray
48
48
  Repeated array.
49
49
  """
50
- return hstack([array for _ in range(n_iterations)])
50
+ return hstack([arr for _ in range(n_iterations)])
51
51
 
52
52
 
53
- def repeat_array_as_rows(n_iterations: int, array: NDArray) -> NDArray:
53
+ def repeat_array_as_rows(n_iterations: int, arr: NDArray) -> NDArray:
54
54
  """
55
55
  Repeat a numpy array vertically as rows.
56
56
 
@@ -58,7 +58,7 @@ def repeat_array_as_rows(n_iterations: int, array: NDArray) -> NDArray:
58
58
  ----------
59
59
  n_iterations : int
60
60
  Number of times the rows should be repeated.
61
- array : NDArray
61
+ arr : NDArray
62
62
  Array to repeat.
63
63
 
64
64
  Returns
@@ -66,7 +66,7 @@ def repeat_array_as_rows(n_iterations: int, array: NDArray) -> NDArray:
66
66
  NDArray
67
67
  Repeated array.
68
68
  """
69
- return vstack([array for _ in range(n_iterations)])
69
+ return vstack([arr for _ in range(n_iterations)])
70
70
 
71
71
 
72
72
  def repeat_1d_array_as_columns(n_columns: int, column: NDArray) -> NDArray:
@@ -307,7 +307,7 @@ def truncated_normal_1d(
307
307
  n_rows, n_columns = shape
308
308
  return repeat_array_as_rows(
309
309
  n_rows,
310
- _truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=n_columns, seed=seed)
310
+ truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=n_columns, seed=seed)
311
311
  )
312
312
 
313
313
 
@@ -339,49 +339,7 @@ def truncated_normal_2d(
339
339
  NDArray
340
340
  Array of samples with 2 dimensional variability.
341
341
  """
342
- return _truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=shape, seed=seed)
343
-
344
-
345
- def _truncnorm_rvs(
346
- a: float,
347
- b: float,
348
- loc: float,
349
- scale: float,
350
- shape: Union[int, tuple[int, ...]],
351
- seed: Union[int, random.Generator, None] = None
352
- ) -> NDArray:
353
- """
354
- Generate random samples from a truncated normal distribution. Unlike the `scipy` equivalent, the `a` and `b` values
355
- are the abscissae at which we wish to truncate the distribution (as opposed to the number of standard deviations
356
- from `loc`).
357
-
358
- Parameters
359
- ----------
360
- loc : float
361
- Mean ("centre") of the distribution.
362
- scale : float
363
- Standard deviation (spread or "width") of the distribution. Must be non-negative.
364
- size : int | tuple[int, ...]
365
- Output shape. If the given shape is, e.g., (m, n, k), then m * n * k samples are drawn.
366
- seed : int | Generator | None, optional
367
- A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
368
- fresh, unpredictable entropy will be pulled from the OS.
369
-
370
- Returns
371
- -------
372
- NDArray
373
- Array of samples.
374
- """
375
- size = prod(shape)
376
- samples = array([])
377
- rng = random.default_rng(seed)
378
-
379
- while samples.size < size:
380
- samples_temp = rng.normal(loc, scale, (size - samples.size) * 2)
381
- valid_samples = samples_temp[(a <= samples_temp) & (samples_temp <= b)]
382
- samples = concatenate([samples, valid_samples])
383
-
384
- return samples[:size].reshape(shape)
342
+ return truncnorm_rvs(a=low, b=high, loc=mu, scale=sigma, shape=shape, seed=seed)
385
343
 
386
344
 
387
345
  def plus_minus_uncertainty_to_normal_1d(
@@ -576,3 +534,56 @@ def gen_seed(node: dict) -> int:
576
534
  node_id = node.get("@id", "")
577
535
  hashed = hashlib.shake_128(node_id.encode(), usedforsecurity=False).hexdigest(4)
578
536
  return abs(int(hashed, 16))
537
+
538
+
539
+ def correlated_normal_2d(
540
+ n_iterations: int,
541
+ means: NDArray,
542
+ sds: NDArray,
543
+ correlation_matrix: NDArray,
544
+ seed: Union[int, random.Generator, None] = None,
545
+ ) -> NDArray:
546
+ """
547
+ Generate correlated random samples from a multivariate normal distribution with specified means, standard
548
+ deviations, and a correlation matrix. Each row represents a different variable (e.g., different years), and each
549
+ column represents a different iteration (sample).
550
+
551
+ Parameters
552
+ ----------
553
+ n_iterations : int
554
+ The number of samples (iterations) to generate for each variable.
555
+ means : NDArray
556
+ An array of mean values for each variable (row).
557
+ sds : NDArray
558
+ An array of standard deviations for each variable (row).
559
+ correlation_matrix : NDArray
560
+ A positive-definite matrix representing the correlations between the variables (rows).
561
+ seed : int | Generator | None, optional
562
+ A seed to initialize the BitGenerator. If passed a Generator, it will be returned unaltered. If `None`, then
563
+ fresh, unpredictable entropy will be pulled from the OS.
564
+
565
+ Returns
566
+ -------
567
+ NDArray
568
+ A 2D array of shape (len(means), n_iterations), where each row corresponds to a different variable and each
569
+ column corresponds to a sample iteration. The values in each row are correlated according to the provided
570
+ correlation matrix.
571
+ """
572
+ # Generate independent random samples for each year
573
+ shape = (len(means), n_iterations)
574
+ independent_samples = normal_2d(shape, 0, 1, seed=seed)
575
+
576
+ # Apply Cholesky decomposition to the correlation matrix
577
+ cholesky_decomp = linalg.cholesky(correlation_matrix)
578
+
579
+ # Apply Cholesky transformation to introduce correlation across years (rows) for each sample
580
+ correlated_samples = dot(cholesky_decomp, independent_samples)
581
+
582
+ # Scale by standard deviations and shift by means
583
+ scaled_samples = (
584
+ correlated_samples
585
+ * repeat_1d_array_as_columns(n_iterations, sds)
586
+ + repeat_1d_array_as_columns(n_iterations, means)
587
+ )
588
+
589
+ return scaled_samples
@@ -1,11 +1,7 @@
1
- import calendar
2
1
  from calendar import monthrange
3
2
  from collections import defaultdict
4
3
  from collections.abc import Iterable
5
4
  from datetime import datetime, timedelta
6
- from uuid import uuid4
7
-
8
- from dateutil.relativedelta import relativedelta
9
5
  from enum import Enum
10
6
  from functools import reduce
11
7
  from typing import (
@@ -16,8 +12,12 @@ from typing import (
16
12
  Optional,
17
13
  Union
18
14
  )
15
+
16
+ from dateutil import parser
17
+ from dateutil.relativedelta import relativedelta
19
18
  from hestia_earth.schema import TermTermType
20
19
  from hestia_earth.utils.api import download_hestia
20
+ from hestia_earth.utils.blank_node import ArrayTreatment, get_node_value
21
21
  from hestia_earth.utils.model import filter_list_term_type
22
22
  from hestia_earth.utils.tools import (
23
23
  flatten,
@@ -26,19 +26,22 @@ from hestia_earth.utils.tools import (
26
26
  safe_parse_float,
27
27
  non_empty_list
28
28
  )
29
- from hestia_earth.utils.blank_node import ArrayTreatment, get_node_value
30
29
 
31
- from ..log import debugValues, log_as_table
32
- from . import is_from_model, _filter_list_term_unit, is_iterable, _omit
30
+ from . import is_from_model, _filter_list_term_unit, is_iterable, full_date_str
33
31
  from .constant import Units
34
- from .property import get_node_property, get_node_property_value
35
32
  from .lookup import (
36
33
  is_model_siteType_allowed,
37
34
  is_siteType_allowed,
38
35
  is_product_id_allowed, is_product_termType_allowed,
39
36
  is_input_id_allowed, is_input_termType_allowed
40
37
  )
38
+ from .property import get_node_property, get_node_property_value
41
39
  from .term import get_lookup_value
40
+ from ..log import debugValues, log_as_table
41
+
42
+ # TODO: verify those values
43
+ MAX_DEPTH = 1000
44
+ OLDEST_DATE = '1800'
42
45
 
43
46
 
44
47
  def merge_blank_nodes(source: list, new_values: list):
@@ -1228,7 +1231,11 @@ def get_inputs_from_properties(input: dict, term_types: Union[TermTermType, List
1228
1231
  A dictionary of nodes grouped by latest date, in the format `{date: list[node]}`.
1229
1232
  """
1230
1233
  input_value = list_sum(input.get('value', []))
1231
- properties = input.get('properties') or download_hestia(input.get('term', {}).get('@id')).get('defaultProperties')
1234
+ properties = (
1235
+ input.get('properties') or
1236
+ input.get('term', {}).get('defaultProperties') or
1237
+ download_hestia(input.get('term', {}).get('@id')).get('defaultProperties')
1238
+ )
1232
1239
  inputs = non_empty_list([
1233
1240
  {
1234
1241
  'term': p.get('key'),
@@ -1238,97 +1245,145 @@ def get_inputs_from_properties(input: dict, term_types: Union[TermTermType, List
1238
1245
  return filter_list_term_type(inputs, term_types)
1239
1246
 
1240
1247
 
1241
- def _get_condensed_nodes(nodes: list) -> tuple[list, bool]:
1242
- """Only considers nodes which already match on non-date criteria."""
1243
- CONDENSABLE_UNITS = [Units.BOOLEAN.value, Units.PERCENTAGE_AREA.value]
1244
- condensed_nodes = []
1245
- matched_uuids = set()
1246
- nodes_by_start_date = {_full_date_str(date_str=n["startDate"], is_end=True): n for n in nodes if "startDate" in n}
1247
- if len(nodes_by_start_date) != len(nodes):
1248
- return nodes, False
1248
+ def _should_group_node(node: dict): return node.get('startDate') and node.get('endDate')
1249
+
1250
+
1251
+ def _parse_date(node: dict, key: str):
1252
+ return safe_parse_date(full_date_str(node.get(key), is_end=key == 'endDate'))
1253
+
1249
1254
 
1250
- for node in nodes:
1251
- search_date = _offset_date(date_str=node.get("endDate", ""))
1252
- if node["uuid"] in matched_uuids:
1253
- continue
1254
- if (search_date in nodes_by_start_date and nodes_by_start_date[search_date]["uuid"] not in matched_uuids
1255
- and node.get("term", {}).get("units") in CONDENSABLE_UNITS):
1256
- new_node = node.copy()
1257
- new_node["endDate"] = nodes_by_start_date[search_date]["endDate"]
1258
- condensed_nodes.append(new_node)
1259
- matched_uuids.add(nodes_by_start_date[search_date]["uuid"])
1260
- elif node["uuid"] not in matched_uuids:
1261
- condensed_nodes.append(node)
1255
+ def _group_nodes_by_consecutive_dates(nodes: list):
1256
+ """Groups dictionaries in a list based on consecutive start and end dates within a 1-day tolerance.
1257
+
1258
+ Args:
1259
+ dicts: A list of dictionaries containing 'startDate' and 'endDate' keys.
1260
+
1261
+ Returns:
1262
+ A list of lists, where each inner list contains dictionaries with consecutive start and end dates.
1263
+ """
1264
+ groups = []
1265
+ group = []
1266
+
1267
+ # make sure the nodes are sorted by dates to group by consecutive dates
1268
+ for n in sorted(nodes, key=lambda d: (_parse_date(d, 'startDate'), _parse_date(d, 'endDate'))):
1269
+ if not group or (
1270
+ _should_group_node(n) and
1271
+ _parse_date(n, 'startDate') - _parse_date(group[-1], 'endDate') <= timedelta(days=1)
1272
+ ):
1273
+ group.append(n)
1274
+ else:
1275
+ groups.append(group)
1276
+ group = [n]
1262
1277
 
1263
- return condensed_nodes, len(matched_uuids) > 0
1278
+ if group:
1279
+ groups.append(group)
1280
+
1281
+ return groups
1282
+
1283
+
1284
+ def _node_from_group(nodes: list):
1285
+ # `nodes` contain list with consecutive dates
1286
+ return nodes[0] if len(nodes) == 1 else nodes[0] | {
1287
+ 'startDate': min(n.get('startDate') for n in nodes),
1288
+ 'endDate': max(n.get('endDate') for n in nodes)
1289
+ }
1290
+
1291
+
1292
+ def _condense_nodes(nodes: list):
1293
+ # `nodes` contain list with same `term.@id` and `value`
1294
+ grouped_nodes = _group_nodes_by_consecutive_dates(nodes)
1295
+ return flatten(map(_node_from_group, grouped_nodes))
1296
+
1297
+
1298
+ def _group_nodes_to_condense(nodes: list) -> dict:
1299
+ def _group_node(group: dict, node: dict):
1300
+ value = node.get('value', [])
1301
+ value = '-'.join(map(str, value if isinstance(value, list) else [value]))
1302
+ properties = '_'.join(non_empty_list([
1303
+ ';'.join(non_empty_list([
1304
+ p.get('term', {}).get('@id'),
1305
+ f"{p.get('value')}"
1306
+ ])) for p in node.get('properties', [])
1307
+ ]))
1308
+ # group by term, value, and properties
1309
+ group_key = '-'.join(non_empty_list([
1310
+ node.get('term', {}).get('@id', ''),
1311
+ value,
1312
+ properties
1313
+ ]))
1314
+ group[group_key] = group.get(group_key, []) + [node]
1315
+ return group
1316
+
1317
+ return reduce(_group_node, nodes, {})
1264
1318
 
1265
1319
 
1266
1320
  def condense_nodes(nodes: list) -> list:
1267
- grouped_nodes = _group_nodes_by_term_and_value(nodes)
1268
- condensed_nodes = dict()
1269
- any_changes_made = False
1270
-
1271
- for key, node_group in grouped_nodes.items():
1272
- condensed_nodes[key] = node_group
1273
- while len(condensed_nodes[key]) > 1:
1274
- condensed_nodes[key], changes_made = _get_condensed_nodes(condensed_nodes[key])
1275
- if not changes_made:
1276
- break
1277
- any_changes_made = True
1278
-
1279
- if not any_changes_made:
1280
- return [_omit(values=n, keys=["uuid"]) for n in nodes]
1281
-
1282
- return sorted(
1283
- flatten([_omit(values=n, keys=["uuid"]) for nodes in condensed_nodes.values() for n in nodes]),
1284
- key=lambda x: x["startDate"]
1285
- )
1321
+ grouped_nodes = _group_nodes_to_condense(nodes)
1322
+ return flatten(map(_condense_nodes, grouped_nodes.values()))
1286
1323
 
1287
1324
 
1288
- DATE_FORMAT = "%Y-%m-%d"
1325
+ def _node_date(node: dict): return parser.isoparse(node.get('endDate', OLDEST_DATE))
1289
1326
 
1290
1327
 
1291
- def _variable_length_str_to_date(date_str: str, is_end: bool) -> datetime:
1292
- """Converts to date, adding start or end of year to YYYY strings as indicated by is_end."""
1293
- return datetime.strptime(_full_date_str(date_str, is_end=is_end), DATE_FORMAT)
1328
+ def _distance(node: dict, date): return abs((_node_date(node) - date).days)
1294
1329
 
1295
1330
 
1296
- def _full_date_str(date_str: str, is_end: bool) -> str:
1297
- suffix = ""
1298
- if len(date_str) == 4:
1299
- # Format YYYY
1300
- suffix = "-12-31" if is_end else "-01-01"
1301
- elif len(date_str) == 7:
1302
- # Format YYYY-MM
1303
- suffix = f"-{calendar.monthrange(int(date_str[:4]), int(date_str[5:7]))[1]}" if is_end else "-01"
1331
+ def _most_recent_nodes(nodes: list, date: str) -> list:
1332
+ closest_date = parser.isoparse(date)
1333
+ min_distance = min([_distance(m, closest_date) for m in nodes])
1334
+ return list(filter(lambda m: _distance(m, closest_date) == min_distance, nodes))
1304
1335
 
1305
- return date_str + suffix
1306
1336
 
1337
+ def _shallowest_node(nodes: list) -> dict:
1338
+ min_depth = min([m.get('depthUpper', MAX_DEPTH) for m in nodes])
1339
+ return next((m for m in nodes if m.get('depthUpper', MAX_DEPTH) == min_depth), {})
1307
1340
 
1308
- def _with_full_dates(node: dict) -> dict:
1309
- output_node = node.copy()
1310
- if "startDate" in output_node:
1311
- output_node["startDate"] = _full_date_str(output_node["startDate"], is_end=False)
1312
- if "endDate" in output_node:
1313
- output_node["endDate"] = _full_date_str(output_node["endDate"], is_end=True)
1314
1341
 
1315
- return output_node
1342
+ def most_relevant_blank_node_by_type(nodes: List[dict],
1343
+ term_type: Union[TermTermType, str, List[TermTermType], List[str]], date: str):
1344
+ """
1345
+ Given a list of cycle specific dated entries like
1346
+ a list of measurements terms or a list of management terms,
1347
+ find the entry closest to a given date
1348
+ Parameters
1349
+ ----------
1350
+ nodes: List[dict]
1351
+ should contain a 'endDate' field otherwise defaults to OLDEST_DATE
1352
+ term_type : TermTermType or List[TermTermType]
1353
+ The `termType` of the `Term`, or a list of `termType`. Example: `TermTermType.CROP`
1354
+ date: str
1355
+ An ISO-8601 datetime compatible string
1316
1356
 
1357
+ Returns
1358
+ -------
1317
1359
 
1318
- def _offset_date(date_str: str, days: int = 1, is_end: bool = True) -> str:
1319
- return (
1320
- _variable_length_str_to_date(date_str=date_str, is_end=is_end) + timedelta(days=days)
1321
- ).strftime(DATE_FORMAT)
1360
+ """
1361
+ filtered_nodes = filter_list_term_type(nodes, term_type)
1322
1362
 
1363
+ return {} if len(filtered_nodes) == 0 \
1364
+ else _shallowest_node(_most_recent_nodes(filtered_nodes, date)) \
1365
+ if date and len(filtered_nodes) > 1 else filtered_nodes[0]
1323
1366
 
1324
- def _group_nodes_by_term_and_value(nodes: list) -> dict:
1325
- grouped_nodes = defaultdict(list)
1326
1367
 
1327
- for node in nodes:
1328
- term_id = node.get("term", {}).get("@id", "")
1329
- value = "-".join([str(v) for v in node.get("value")]) if isinstance(node.get("value"), list) \
1330
- else node.get("value")
1331
- node["uuid"] = uuid4()
1332
- grouped_nodes[(term_id, value)].append(_with_full_dates(node))
1368
+ def most_relevant_blank_node_by_id(nodes: list, term_id: str, date: str):
1369
+ """
1370
+ Given a list of nodes with term_id like
1371
+ a list of measurements terms or a list of management terms,
1372
+ find the entry closest to a given date
1373
+ Parameters
1374
+ ----------
1375
+ nodes: List[dict]
1376
+ should contain a 'endDate' field otherwise defaults to OLDEST_DATE
1377
+ term_id : str
1378
+ the term "@id" of the node we want to match to
1379
+ date: str
1380
+ An ISO-8601 datetime compatible string
1381
+
1382
+ Returns
1383
+ -------
1333
1384
 
1334
- return grouped_nodes
1385
+ """
1386
+ filtered_nodes = [m for m in nodes if m.get('term', {}).get('@id') == term_id]
1387
+ return {} if len(filtered_nodes) == 0 \
1388
+ else _shallowest_node(_most_recent_nodes(filtered_nodes, date)) \
1389
+ if date and len(filtered_nodes) > 1 else filtered_nodes[0]