hestia-earth-models 0.64.3__py3-none-any.whl → 0.64.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of hestia-earth-models might be problematic. Click here for more details.
- hestia_earth/models/blonkConsultants2016/ch4ToAirNaturalVegetationBurning.py +5 -9
- hestia_earth/models/blonkConsultants2016/co2ToAirAboveGroundBiomassStockChangeLandUseChange.py +5 -9
- hestia_earth/models/blonkConsultants2016/n2OToAirNaturalVegetationBurningDirect.py +6 -13
- hestia_earth/models/cycle/animal/input/properties.py +6 -0
- hestia_earth/models/cycle/completeness/soilAmendment.py +3 -2
- hestia_earth/models/cycle/concentrateFeed.py +10 -4
- hestia_earth/models/cycle/input/properties.py +6 -0
- hestia_earth/models/cycle/liveAnimal.py +2 -2
- hestia_earth/models/cycle/milkYield.py +3 -3
- hestia_earth/models/cycle/otherSitesArea.py +59 -0
- hestia_earth/models/cycle/otherSitesUnusedDuration.py +9 -8
- hestia_earth/models/cycle/pastureSystem.py +3 -2
- hestia_earth/models/cycle/product/properties.py +6 -0
- hestia_earth/models/cycle/siteArea.py +83 -0
- hestia_earth/models/cycle/stockingDensityAnimalHousingAverage.py +28 -16
- hestia_earth/models/cycle/utils.py +1 -1
- hestia_earth/models/environmentalFootprintV3/soilQualityIndexLandOccupation.py +128 -0
- hestia_earth/models/environmentalFootprintV3/utils.py +17 -0
- hestia_earth/models/fantkeEtAl2016/__init__.py +13 -0
- hestia_earth/models/fantkeEtAl2016/damageToHumanHealthParticulateMatterFormation.py +49 -0
- hestia_earth/models/frischknechtEtAl2000/__init__.py +13 -0
- hestia_earth/models/frischknechtEtAl2000/ionisingRadiationKbqU235Eq.py +90 -0
- hestia_earth/models/ipcc2006/co2ToAirOrganicSoilCultivation.py +17 -6
- hestia_earth/models/ipcc2006/n2OToAirOrganicSoilCultivationDirect.py +17 -6
- hestia_earth/models/ipcc2019/animal/liveweightGain.py +4 -3
- hestia_earth/models/ipcc2019/animal/liveweightPerHead.py +4 -3
- hestia_earth/models/ipcc2019/animal/weightAtMaturity.py +5 -4
- hestia_earth/models/ipcc2019/co2ToAirCarbonStockChange_utils.py +904 -0
- hestia_earth/models/ipcc2019/co2ToAirSoilOrganicCarbonStockChangeManagementChange.py +70 -618
- hestia_earth/models/mocking/search-results.json +390 -318
- hestia_earth/models/pooreNemecek2018/saplings.py +10 -7
- hestia_earth/models/site/management.py +18 -14
- hestia_earth/models/site/soilMeasurement.py +2 -2
- hestia_earth/models/utils/__init__.py +38 -0
- hestia_earth/models/utils/array_builders.py +63 -52
- hestia_earth/models/utils/blank_node.py +137 -82
- hestia_earth/models/utils/descriptive_stats.py +3 -239
- hestia_earth/models/utils/emission.py +6 -2
- hestia_earth/models/utils/feedipedia.py +15 -2
- hestia_earth/models/utils/impact_assessment.py +10 -5
- hestia_earth/models/utils/landCover.py +9 -0
- hestia_earth/models/utils/lookup.py +16 -3
- hestia_earth/models/utils/measurement.py +3 -28
- hestia_earth/models/utils/stats.py +429 -0
- hestia_earth/models/utils/term.py +15 -3
- hestia_earth/models/utils/time_series.py +90 -0
- hestia_earth/models/version.py +1 -1
- {hestia_earth_models-0.64.3.dist-info → hestia_earth_models-0.64.5.dist-info}/METADATA +1 -1
- {hestia_earth_models-0.64.3.dist-info → hestia_earth_models-0.64.5.dist-info}/RECORD +76 -54
- tests/models/blonkConsultants2016/test_ch4ToAirNaturalVegetationBurning.py +2 -2
- tests/models/blonkConsultants2016/test_co2ToAirAboveGroundBiomassStockChangeLandUseChange.py +2 -2
- tests/models/blonkConsultants2016/test_n2OToAirNaturalVegetationBurningDirect.py +2 -2
- tests/models/cycle/completeness/test_soilAmendment.py +1 -1
- tests/models/cycle/test_liveAnimal.py +1 -1
- tests/models/cycle/test_milkYield.py +1 -1
- tests/models/cycle/test_otherSitesArea.py +68 -0
- tests/models/cycle/test_siteArea.py +51 -0
- tests/models/cycle/test_stockingDensityAnimalHousingAverage.py +2 -2
- tests/models/environmentalFootprintV3/test_soilQualityIndexLandOccupation.py +136 -0
- tests/models/fantkeEtAl2016/__init__.py +0 -0
- tests/models/fantkeEtAl2016/test_damageToHumanHealthParticulateMatterFormation.py +20 -0
- tests/models/frischknechtEtAl2000/__init__.py +0 -0
- tests/models/frischknechtEtAl2000/test_ionisingRadiationKbqU235Eq.py +70 -0
- tests/models/ipcc2019/test_co2ToAirCarbonStockChange_utils.py +50 -0
- tests/models/ipcc2019/test_co2ToAirSoilOrganicCarbonStockChangeManagementChange.py +1 -39
- tests/models/pooreNemecek2018/test_saplings.py +1 -1
- tests/models/site/test_management.py +3 -153
- tests/models/utils/test_array_builders.py +67 -6
- tests/models/utils/test_blank_node.py +191 -7
- tests/models/utils/test_descriptive_stats.py +2 -86
- tests/models/utils/test_measurement.py +1 -22
- tests/models/utils/test_stats.py +186 -0
- tests/models/utils/test_time_series.py +88 -0
- {hestia_earth_models-0.64.3.dist-info → hestia_earth_models-0.64.5.dist-info}/LICENSE +0 -0
- {hestia_earth_models-0.64.3.dist-info → hestia_earth_models-0.64.5.dist-info}/WHEEL +0 -0
- {hestia_earth_models-0.64.3.dist-info → hestia_earth_models-0.64.5.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,15 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
1
3
|
from datetime import datetime
|
|
2
4
|
import pytest
|
|
3
5
|
from pytest import mark
|
|
4
6
|
from unittest.mock import patch
|
|
7
|
+
from tests.utils import fixtures_path
|
|
5
8
|
|
|
6
9
|
from hestia_earth.schema import SiteSiteType
|
|
7
10
|
from hestia_earth.utils.tools import parse
|
|
8
|
-
|
|
9
11
|
from hestia_earth.models.utils.blank_node import (
|
|
12
|
+
condense_nodes,
|
|
10
13
|
_calc_datetime_range_intersection_duration,
|
|
11
14
|
_gapfill_datestr,
|
|
12
15
|
_get_datestr_format,
|
|
@@ -19,11 +22,172 @@ from hestia_earth.models.utils.blank_node import (
|
|
|
19
22
|
group_nodes_by_year,
|
|
20
23
|
group_nodes_by_year_and_month,
|
|
21
24
|
GroupNodesByYearMode,
|
|
22
|
-
split_node_by_dates
|
|
25
|
+
split_node_by_dates,
|
|
26
|
+
_most_recent_nodes,
|
|
27
|
+
_shallowest_node
|
|
23
28
|
)
|
|
24
29
|
|
|
25
30
|
|
|
26
31
|
class_path = "hestia_earth.models.utils.blank_node"
|
|
32
|
+
measurement_fixtures_folder = f"{fixtures_path}/utils/measurement"
|
|
33
|
+
|
|
34
|
+
fixtures_path = os.path.join(fixtures_path, 'utils', 'blank_node')
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_condense_nodes():
|
|
38
|
+
with open(f"{fixtures_path}/condense-nodes/original.jsonld", encoding='utf-8') as f:
|
|
39
|
+
original = json.load(f)
|
|
40
|
+
with open(f"{fixtures_path}/condense-nodes/result.jsonld", encoding='utf-8') as f:
|
|
41
|
+
expected = json.load(f)
|
|
42
|
+
|
|
43
|
+
value = condense_nodes(original)
|
|
44
|
+
assert value == expected
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@pytest.mark.parametrize(
|
|
48
|
+
"test_name,input_nodes,expected_output_nodes",
|
|
49
|
+
[
|
|
50
|
+
(
|
|
51
|
+
"No match",
|
|
52
|
+
[
|
|
53
|
+
{"startDate": "2001", "endDate": "2002", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
54
|
+
"value": [2]},
|
|
55
|
+
{"startDate": "2003", "endDate": "2004", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
56
|
+
"value": [2]},
|
|
57
|
+
],
|
|
58
|
+
[
|
|
59
|
+
{"startDate": "2001", "endDate": "2002", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
60
|
+
"value": [2]},
|
|
61
|
+
{"startDate": "2003", "endDate": "2004", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
62
|
+
"value": [2]},
|
|
63
|
+
],
|
|
64
|
+
),
|
|
65
|
+
(
|
|
66
|
+
"No continuity",
|
|
67
|
+
[
|
|
68
|
+
{"startDate": "2001", "endDate": "2002", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
69
|
+
"value": [2]},
|
|
70
|
+
{"startDate": "2004", "endDate": "2005", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
71
|
+
"value": [2]},
|
|
72
|
+
],
|
|
73
|
+
[
|
|
74
|
+
{"startDate": "2001", "endDate": "2002", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
75
|
+
"value": [2]},
|
|
76
|
+
{"startDate": "2004", "endDate": "2005", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
77
|
+
"value": [2]},
|
|
78
|
+
],
|
|
79
|
+
),
|
|
80
|
+
(
|
|
81
|
+
"No continuity (multiple values differ)",
|
|
82
|
+
[
|
|
83
|
+
{"startDate": "2001", "endDate": "2002", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
84
|
+
"value": [10, 20]},
|
|
85
|
+
{"startDate": "2003", "endDate": "2004", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
86
|
+
"value": [10, 30]},
|
|
87
|
+
],
|
|
88
|
+
[
|
|
89
|
+
{"startDate": "2001", "endDate": "2002", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
90
|
+
"value": [10, 20]},
|
|
91
|
+
{"startDate": "2003", "endDate": "2004", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
92
|
+
"value": [10, 30]},
|
|
93
|
+
],
|
|
94
|
+
),
|
|
95
|
+
(
|
|
96
|
+
"2-1 condense (YYYY dates)",
|
|
97
|
+
[
|
|
98
|
+
{"startDate": "2001", "endDate": "2001", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
99
|
+
"value": [10, 20]},
|
|
100
|
+
{"startDate": "2002", "endDate": "2002", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
101
|
+
"value": [10, 20]},
|
|
102
|
+
],
|
|
103
|
+
[
|
|
104
|
+
{"startDate": "2001", "endDate": "2002", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
105
|
+
"value": [10, 20]}
|
|
106
|
+
],
|
|
107
|
+
),
|
|
108
|
+
(
|
|
109
|
+
"4-2 condense (YYYY-MM dates)",
|
|
110
|
+
[
|
|
111
|
+
{"startDate": "2001-01", "endDate": "2002-01", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
112
|
+
"value": [2]},
|
|
113
|
+
{"startDate": "2002-01", "endDate": "2002-03", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
114
|
+
"value": [2]},
|
|
115
|
+
],
|
|
116
|
+
[
|
|
117
|
+
{"startDate": "2001-01", "endDate": "2002-03", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
118
|
+
"value": [2]}
|
|
119
|
+
],
|
|
120
|
+
),
|
|
121
|
+
(
|
|
122
|
+
"2-1 condense (YYYY-MM-DD dates)",
|
|
123
|
+
[
|
|
124
|
+
{"startDate": "2001-01-01", "endDate": "2001-12-31", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
125
|
+
"value": [2]},
|
|
126
|
+
{"startDate": "2002-01-01", "endDate": "2002-05-04", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
127
|
+
"value": [2]},
|
|
128
|
+
],
|
|
129
|
+
[
|
|
130
|
+
{"startDate": "2001-01-01", "endDate": "2002-05-04", "term": {"@id": "treeNutTree", "units": "% area"},
|
|
131
|
+
"value": [2]}
|
|
132
|
+
],
|
|
133
|
+
),
|
|
134
|
+
(
|
|
135
|
+
"3-1-condense",
|
|
136
|
+
[
|
|
137
|
+
{"startDate": "2001-01-01", "endDate": "2001-12-31", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
138
|
+
"value": [9]},
|
|
139
|
+
{"startDate": "2002-01-01", "endDate": "2002-10-31", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
140
|
+
"value": [9]},
|
|
141
|
+
{"startDate": "2002-11-01", "endDate": "2004-04-05", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
142
|
+
"value": [9]},
|
|
143
|
+
],
|
|
144
|
+
[
|
|
145
|
+
{"startDate": "2001-01-01", "endDate": "2004-04-05", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
146
|
+
"value": [9]}
|
|
147
|
+
],
|
|
148
|
+
),
|
|
149
|
+
(
|
|
150
|
+
"3-2-partial-condense",
|
|
151
|
+
[
|
|
152
|
+
{"startDate": "2001-01-01", "endDate": "2001-12-31", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
153
|
+
"value": [9]},
|
|
154
|
+
{"startDate": "2012-02-01", "endDate": "2012-12-31", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
155
|
+
"value": [9]},
|
|
156
|
+
{"startDate": "2002-01-01", "endDate": "2003-12-31", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
157
|
+
"value": [9]},
|
|
158
|
+
],
|
|
159
|
+
[
|
|
160
|
+
{"startDate": "2001-01-01", "endDate": "2003-12-31", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
161
|
+
"value": [9]},
|
|
162
|
+
{"startDate": "2012-02-01", "endDate": "2012-12-31", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
163
|
+
"value": [9]},
|
|
164
|
+
],
|
|
165
|
+
),
|
|
166
|
+
(
|
|
167
|
+
"7-2-multi-condense",
|
|
168
|
+
[
|
|
169
|
+
{"startDate": "2001-01-01", "endDate": "2001-11-30", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
170
|
+
"value": [7]},
|
|
171
|
+
{"startDate": "2012-02-01", "endDate": "2012-12-31", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
172
|
+
"value": [7]},
|
|
173
|
+
{"startDate": "2001-12-01", "endDate": "2001-12-31", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
174
|
+
"value": [7]},
|
|
175
|
+
{"startDate": "2002-01-01", "endDate": "2002-12-31", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
176
|
+
"value": [7]},
|
|
177
|
+
{"startDate": "2013-01-01", "endDate": "2013-05-20", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
178
|
+
"value": [7]},
|
|
179
|
+
],
|
|
180
|
+
[
|
|
181
|
+
{"startDate": "2001-01-01", "endDate": "2002-12-31", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
182
|
+
"value": [7]},
|
|
183
|
+
{"startDate": "2012-02-01", "endDate": "2013-05-20", "term": {"@id": "bananaPlant", "units": "% area"},
|
|
184
|
+
"value": [7]}
|
|
185
|
+
],
|
|
186
|
+
),
|
|
187
|
+
]
|
|
188
|
+
)
|
|
189
|
+
def test_condense_nodes_(test_name, input_nodes, expected_output_nodes):
|
|
190
|
+
assert condense_nodes(input_nodes) == expected_output_nodes
|
|
27
191
|
|
|
28
192
|
|
|
29
193
|
def test_run_required():
|
|
@@ -978,11 +1142,11 @@ PARAMS_SPLIT_NODE = [
|
|
|
978
1142
|
)
|
|
979
1143
|
]
|
|
980
1144
|
IDS_SPLIT_NODE = [
|
|
981
|
-
"no split
|
|
982
|
-
"no split
|
|
983
|
-
"no split
|
|
984
|
-
"no split
|
|
985
|
-
"no split
|
|
1145
|
+
"no split - empty node",
|
|
1146
|
+
"no split - not enough dates", # len(value) and len(dates) MUST match
|
|
1147
|
+
"no split - startDate & endDate",
|
|
1148
|
+
"no split - non-iterable value", # i.e., on a Management or Animal node.
|
|
1149
|
+
"no split - null value", # i.e., on a Animal node where value is not required.
|
|
986
1150
|
"value & dates",
|
|
987
1151
|
"descriptive statistics",
|
|
988
1152
|
"descriptive statistics w/ bad key" # if descriptive statistic keys have wrong length, don't split them
|
|
@@ -992,3 +1156,23 @@ IDS_SPLIT_NODE = [
|
|
|
992
1156
|
@mark.parametrize("node, expected", PARAMS_SPLIT_NODE, ids=IDS_SPLIT_NODE)
|
|
993
1157
|
def test_split_node_by_dates(node, expected):
|
|
994
1158
|
assert split_node_by_dates(node) == expected
|
|
1159
|
+
|
|
1160
|
+
|
|
1161
|
+
def test_most_recent_measurements():
|
|
1162
|
+
with open(f"{measurement_fixtures_folder}/measurements.jsonld", encoding='utf-8') as f:
|
|
1163
|
+
measurements = json.load(f)
|
|
1164
|
+
|
|
1165
|
+
with open(f"{measurement_fixtures_folder}/most-recent/measurements.jsonld", encoding='utf-8') as f:
|
|
1166
|
+
expected = json.load(f)
|
|
1167
|
+
|
|
1168
|
+
assert _most_recent_nodes(measurements, '2011') == expected
|
|
1169
|
+
|
|
1170
|
+
|
|
1171
|
+
def test_shallowest_measurement():
|
|
1172
|
+
with open(f"{measurement_fixtures_folder}/most-recent/measurements.jsonld", encoding='utf-8') as f:
|
|
1173
|
+
measurements = json.load(f)
|
|
1174
|
+
|
|
1175
|
+
with open(f"{measurement_fixtures_folder}/shallowest/measurement.jsonld", encoding='utf-8') as f:
|
|
1176
|
+
expected = json.load(f)
|
|
1177
|
+
|
|
1178
|
+
assert _shallowest_node(measurements) == expected
|
|
@@ -1,93 +1,9 @@
|
|
|
1
|
-
from numpy import array
|
|
2
|
-
from numpy.testing import assert_almost_equal
|
|
1
|
+
from numpy import array
|
|
3
2
|
from pytest import mark
|
|
4
3
|
|
|
5
4
|
from hestia_earth.schema import MeasurementStatsDefinition
|
|
6
5
|
|
|
7
|
-
from hestia_earth.models.utils.descriptive_stats import
|
|
8
|
-
_calc_confidence_level, calc_confidence_level_monte_carlo, calc_descriptive_stats, calc_precision_monte_carlo,
|
|
9
|
-
calc_required_iterations_monte_carlo, calc_z_critical
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
# confidence_level, n_sided, z_critical
|
|
13
|
-
CONFIDENCE_INTERVAL_PARAMS = [
|
|
14
|
-
# 1 sided
|
|
15
|
-
(0, 1, -inf),
|
|
16
|
-
(50, 1, 0),
|
|
17
|
-
(80, 1, 0.8416),
|
|
18
|
-
(90, 1, 1.2816),
|
|
19
|
-
(95, 1, 1.6449),
|
|
20
|
-
(99, 1, 2.3263),
|
|
21
|
-
(100, 1, inf),
|
|
22
|
-
# 2 sided
|
|
23
|
-
(0, 2, 0),
|
|
24
|
-
(50, 2, 0.6745),
|
|
25
|
-
(80, 2, 1.2816),
|
|
26
|
-
(90, 2, 1.6449),
|
|
27
|
-
(95, 2, 1.9600),
|
|
28
|
-
(99, 2, 2.5758),
|
|
29
|
-
(100, 2, inf)
|
|
30
|
-
]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@mark.parametrize(
|
|
34
|
-
"confidence_level, n_sided, z_critical",
|
|
35
|
-
CONFIDENCE_INTERVAL_PARAMS,
|
|
36
|
-
ids=[f"z={z}, n={n}" for _, n, z in CONFIDENCE_INTERVAL_PARAMS]
|
|
37
|
-
)
|
|
38
|
-
def test_calc_confidence_level(confidence_level, n_sided, z_critical):
|
|
39
|
-
result = _calc_confidence_level(z_critical, n_sided=n_sided)
|
|
40
|
-
assert_almost_equal(result, confidence_level, decimal=2)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
@mark.parametrize(
|
|
44
|
-
"confidence_level, n_sided, z_critical",
|
|
45
|
-
CONFIDENCE_INTERVAL_PARAMS,
|
|
46
|
-
ids=[f"conf={conf}, n={n}" for conf, n, _ in CONFIDENCE_INTERVAL_PARAMS]
|
|
47
|
-
)
|
|
48
|
-
def test_calc_z_critical(confidence_level, n_sided, z_critical):
|
|
49
|
-
result = calc_z_critical(confidence_level, n_sided=n_sided)
|
|
50
|
-
assert_almost_equal(result, z_critical, decimal=4)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
# confidence_level, n_iterations, precision, sd
|
|
54
|
-
MONTE_CARLO_PARAMS = [
|
|
55
|
-
(95, 80767, 0.01, 1.45),
|
|
56
|
-
(95, 1110, 0.01, 0.17),
|
|
57
|
-
(99, 1917, 0.01, 0.17),
|
|
58
|
-
(50, 102, 100.18, 1500)
|
|
59
|
-
]
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
@mark.parametrize(
|
|
63
|
-
"confidence_level, n_iterations, precision, sd",
|
|
64
|
-
MONTE_CARLO_PARAMS,
|
|
65
|
-
ids=[f"n={n}, prec={prec}, sd={sd}" for _, n, prec, sd in MONTE_CARLO_PARAMS]
|
|
66
|
-
)
|
|
67
|
-
def test_calc_confidence_level_monte_carlo(confidence_level, n_iterations, precision, sd):
|
|
68
|
-
result = calc_confidence_level_monte_carlo(n_iterations, precision, sd,)
|
|
69
|
-
assert_almost_equal(result, confidence_level, decimal=2)
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
@mark.parametrize(
|
|
73
|
-
"confidence_level, n_iterations, precision, sd",
|
|
74
|
-
MONTE_CARLO_PARAMS,
|
|
75
|
-
ids=[f"conf={conf}, prec={prec}, sd={sd}" for conf, _, prec, sd in MONTE_CARLO_PARAMS]
|
|
76
|
-
)
|
|
77
|
-
def test_calc_required_iterations_monte_carlo(confidence_level, n_iterations, precision, sd):
|
|
78
|
-
result = calc_required_iterations_monte_carlo(confidence_level, precision, sd)
|
|
79
|
-
assert result == n_iterations
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
@mark.parametrize(
|
|
83
|
-
"confidence_level, n_iterations, precision, sd",
|
|
84
|
-
MONTE_CARLO_PARAMS,
|
|
85
|
-
ids=[f"conf={conf}, n={n}, sd={sd}" for conf, n, _, sd in MONTE_CARLO_PARAMS]
|
|
86
|
-
)
|
|
87
|
-
def test_calc_precision_monte_carlo(confidence_level, n_iterations, precision, sd):
|
|
88
|
-
result = calc_precision_monte_carlo(confidence_level, n_iterations, sd)
|
|
89
|
-
assert_almost_equal(result, precision, decimal=2)
|
|
90
|
-
|
|
6
|
+
from hestia_earth.models.utils.descriptive_stats import calc_descriptive_stats
|
|
91
7
|
|
|
92
8
|
EXPECTED_FLATTENED = {
|
|
93
9
|
"value": [5],
|
|
@@ -7,8 +7,7 @@ from hestia_earth.schema import MeasurementMethodClassification
|
|
|
7
7
|
from tests.utils import fixtures_path, TERM
|
|
8
8
|
|
|
9
9
|
from hestia_earth.models.utils.measurement import (
|
|
10
|
-
_new_measurement, most_relevant_measurement_value,
|
|
11
|
-
min_measurement_method_classification
|
|
10
|
+
_new_measurement, most_relevant_measurement_value, min_measurement_method_classification
|
|
12
11
|
)
|
|
13
12
|
|
|
14
13
|
class_path = 'hestia_earth.models.utils.measurement'
|
|
@@ -70,26 +69,6 @@ def test_most_relevant_measurement_value_by_year_month_day():
|
|
|
70
69
|
assert most_relevant_measurement_value(measurements, 'soilPh', '2030-01-07') == 2030
|
|
71
70
|
|
|
72
71
|
|
|
73
|
-
def test_most_recent_measurements():
|
|
74
|
-
with open(f"{fixtures_folder}/measurements.jsonld", encoding='utf-8') as f:
|
|
75
|
-
measurements = json.load(f)
|
|
76
|
-
|
|
77
|
-
with open(f"{fixtures_folder}/most-recent/measurements.jsonld", encoding='utf-8') as f:
|
|
78
|
-
expected = json.load(f)
|
|
79
|
-
|
|
80
|
-
assert _most_recent_measurements(measurements, '2011') == expected
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def test_shallowest_measurement():
|
|
84
|
-
with open(f"{fixtures_folder}/most-recent/measurements.jsonld", encoding='utf-8') as f:
|
|
85
|
-
measurements = json.load(f)
|
|
86
|
-
|
|
87
|
-
with open(f"{fixtures_folder}/shallowest/measurement.jsonld", encoding='utf-8') as f:
|
|
88
|
-
expected = json.load(f)
|
|
89
|
-
|
|
90
|
-
assert _shallowest_measurement(measurements) == expected
|
|
91
|
-
|
|
92
|
-
|
|
93
72
|
@mark.parametrize(
|
|
94
73
|
"input, expected",
|
|
95
74
|
[
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
from numpy import inf, sqrt
|
|
2
|
+
from numpy.testing import assert_almost_equal
|
|
3
|
+
from pytest import mark
|
|
4
|
+
|
|
5
|
+
from hestia_earth.models.utils.stats import (
|
|
6
|
+
_calc_confidence_level, add_normal_distributions, calc_confidence_level_monte_carlo, calc_precision_monte_carlo,
|
|
7
|
+
calc_required_iterations_monte_carlo, calc_z_critical, lerp_normal_distributions, subtract_normal_distributions
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# confidence_level, n_sided, z_critical
|
|
12
|
+
CONFIDENCE_INTERVAL_PARAMS = [
|
|
13
|
+
# 1 sided
|
|
14
|
+
(0, 1, -inf),
|
|
15
|
+
(50, 1, 0),
|
|
16
|
+
(80, 1, 0.8416),
|
|
17
|
+
(90, 1, 1.2816),
|
|
18
|
+
(95, 1, 1.6449),
|
|
19
|
+
(99, 1, 2.3263),
|
|
20
|
+
(100, 1, inf),
|
|
21
|
+
# 2 sided
|
|
22
|
+
(0, 2, 0),
|
|
23
|
+
(50, 2, 0.6745),
|
|
24
|
+
(80, 2, 1.2816),
|
|
25
|
+
(90, 2, 1.6449),
|
|
26
|
+
(95, 2, 1.9600),
|
|
27
|
+
(99, 2, 2.5758),
|
|
28
|
+
(100, 2, inf)
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@mark.parametrize(
|
|
33
|
+
"confidence_level, n_sided, z_critical",
|
|
34
|
+
CONFIDENCE_INTERVAL_PARAMS,
|
|
35
|
+
ids=[f"z={z}, n={n}" for _, n, z in CONFIDENCE_INTERVAL_PARAMS]
|
|
36
|
+
)
|
|
37
|
+
def test_calc_confidence_level(confidence_level, n_sided, z_critical):
|
|
38
|
+
result = _calc_confidence_level(z_critical, n_sided=n_sided)
|
|
39
|
+
assert_almost_equal(result, confidence_level, decimal=2)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@mark.parametrize(
|
|
43
|
+
"confidence_level, n_sided, z_critical",
|
|
44
|
+
CONFIDENCE_INTERVAL_PARAMS,
|
|
45
|
+
ids=[f"conf={conf}, n={n}" for conf, n, _ in CONFIDENCE_INTERVAL_PARAMS]
|
|
46
|
+
)
|
|
47
|
+
def test_calc_z_critical(confidence_level, n_sided, z_critical):
|
|
48
|
+
result = calc_z_critical(confidence_level, n_sided=n_sided)
|
|
49
|
+
assert_almost_equal(result, z_critical, decimal=4)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# confidence_level, n_iterations, precision, sd
|
|
53
|
+
MONTE_CARLO_PARAMS = [
|
|
54
|
+
(95, 80767, 0.01, 1.45),
|
|
55
|
+
(95, 1110, 0.01, 0.17),
|
|
56
|
+
(99, 1917, 0.01, 0.17),
|
|
57
|
+
(50, 102, 100.18, 1500)
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@mark.parametrize(
|
|
62
|
+
"confidence_level, n_iterations, precision, sd",
|
|
63
|
+
MONTE_CARLO_PARAMS,
|
|
64
|
+
ids=[f"n={n}, prec={prec}, sd={sd}" for _, n, prec, sd in MONTE_CARLO_PARAMS]
|
|
65
|
+
)
|
|
66
|
+
def test_calc_confidence_level_monte_carlo(confidence_level, n_iterations, precision, sd):
|
|
67
|
+
result = calc_confidence_level_monte_carlo(n_iterations, precision, sd,)
|
|
68
|
+
assert_almost_equal(result, confidence_level, decimal=2)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@mark.parametrize(
|
|
72
|
+
"confidence_level, n_iterations, precision, sd",
|
|
73
|
+
MONTE_CARLO_PARAMS,
|
|
74
|
+
ids=[f"conf={conf}, prec={prec}, sd={sd}" for conf, _, prec, sd in MONTE_CARLO_PARAMS]
|
|
75
|
+
)
|
|
76
|
+
def test_calc_required_iterations_monte_carlo(confidence_level, n_iterations, precision, sd):
|
|
77
|
+
result = calc_required_iterations_monte_carlo(confidence_level, precision, sd)
|
|
78
|
+
assert result == n_iterations
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@mark.parametrize(
|
|
82
|
+
"confidence_level, n_iterations, precision, sd",
|
|
83
|
+
MONTE_CARLO_PARAMS,
|
|
84
|
+
ids=[f"conf={conf}, n={n}, sd={sd}" for conf, n, _, sd in MONTE_CARLO_PARAMS]
|
|
85
|
+
)
|
|
86
|
+
def test_calc_precision_monte_carlo(confidence_level, n_iterations, precision, sd):
|
|
87
|
+
result = calc_precision_monte_carlo(confidence_level, n_iterations, sd)
|
|
88
|
+
assert_almost_equal(result, precision, decimal=2)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
# mu_1, sigma_1, mu_2, sigma_2, rho, sum_mean, sum_sigma, diff_mean, diff_sigma
|
|
92
|
+
PARAMS_NORMAL_DIST = [
|
|
93
|
+
# 2 standard normal distributions, perfectly negative correlation
|
|
94
|
+
(0, 1, 0, 1, -1, 0, 0, 0, 2),
|
|
95
|
+
# 2 standard normal distributions, negative correlation
|
|
96
|
+
(0, 1, 0, 1, -0.5, 0, 1, 0, sqrt(3)),
|
|
97
|
+
# 2 standard normal distributions, no correlation
|
|
98
|
+
(0, 1, 0, 1, 0, 0, sqrt(2), 0, sqrt(2)),
|
|
99
|
+
# 2 standard normal distributions, positive correlation
|
|
100
|
+
(0, 1, 0, 1, 0.5, 0, sqrt(3), 0, 1),
|
|
101
|
+
# 2 standard normal distributions, perfectly positive correlation
|
|
102
|
+
(0, 1, 0, 1, 1, 0, 2, 0, 0),
|
|
103
|
+
# different normal distributions, perfectly negative correlation
|
|
104
|
+
(50000, 3000, 45000, 9000, -1, 95000, 6000, 5000, 12000),
|
|
105
|
+
# different normal distributions, no correlation
|
|
106
|
+
(50000, 3000, 45000, 9000, 0, 95000, sqrt(90000000), 5000, sqrt(90000000)),
|
|
107
|
+
# different normal distributions, perfectly positive correlation
|
|
108
|
+
(50000, 3000, 45000, 9000, 1, 95000, 12000, 5000, 6000)
|
|
109
|
+
]
|
|
110
|
+
IDS_ADD_NORMAL_DIST = [
|
|
111
|
+
f"N({mu_1}, {sigma_1}^2) + N({mu_2}, {sigma_2}^2), rho: {rho}"
|
|
112
|
+
for mu_1, sigma_1, mu_2, sigma_2, rho, *_ in PARAMS_NORMAL_DIST
|
|
113
|
+
]
|
|
114
|
+
IDS_SUBTRACT_DIST = [
|
|
115
|
+
f"N({mu_1}, {sigma_1}^2) - N({mu_2}, {sigma_2}^2), rho: {rho}"
|
|
116
|
+
for mu_1, sigma_1, mu_2, sigma_2, rho, *_ in PARAMS_NORMAL_DIST
|
|
117
|
+
]
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@mark.parametrize(
|
|
121
|
+
"mu_1, sigma_1, mu_2, sigma_2, rho, sum_mean, sum_sigma, _diff_mean, _diff_sigma",
|
|
122
|
+
PARAMS_NORMAL_DIST,
|
|
123
|
+
ids=IDS_ADD_NORMAL_DIST
|
|
124
|
+
)
|
|
125
|
+
def test_add_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, rho, sum_mean, sum_sigma, _diff_mean, _diff_sigma):
|
|
126
|
+
result = add_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, rho)
|
|
127
|
+
assert result == (sum_mean, sum_sigma)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@mark.parametrize(
|
|
131
|
+
"mu_1, sigma_1, mu_2, sigma_2, rho, _sum_mean, _sum_sigma, diff_mean, diff_sigma",
|
|
132
|
+
PARAMS_NORMAL_DIST,
|
|
133
|
+
ids=IDS_SUBTRACT_DIST
|
|
134
|
+
)
|
|
135
|
+
def test_subtract_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, rho, _sum_mean, _sum_sigma, diff_mean, diff_sigma):
|
|
136
|
+
result = subtract_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, rho)
|
|
137
|
+
assert result == (diff_mean, diff_sigma)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
# mu_1, sigma_1, mu_2, sigma_2, alpha, rho, Z_mean, Z_sigma
|
|
141
|
+
PARAMS_LERP_NORMAL_DIST = [
|
|
142
|
+
# 2 standard normal distributions, perfectly negative correlation
|
|
143
|
+
(0, 1, 0, 1, 0, -1, 0, 1),
|
|
144
|
+
(0, 1, 0, 1, 0.5, -1, 0, 0),
|
|
145
|
+
(0, 1, 0, 1, 1, -1, 0, 1),
|
|
146
|
+
# 2 standard normal distributions, no correlation
|
|
147
|
+
(0, 1, 0, 1, 0, 0, 0, 1),
|
|
148
|
+
(0, 1, 0, 1, 0.5, 0, 0, sqrt(0.5)),
|
|
149
|
+
(0, 1, 0, 1, 1, 0, 0, 1),
|
|
150
|
+
# 2 standard normal distributions, perfectly positive correlation
|
|
151
|
+
(0, 1, 0, 1, 0, 1, 0, 1),
|
|
152
|
+
(0, 1, 0, 1, 0.5, 1, 0, 1),
|
|
153
|
+
(0, 1, 0, 1, 1, 1, 0, 1),
|
|
154
|
+
# different normal distributions, perfectly negative correlation
|
|
155
|
+
(10000, 3000, 5000, 2500, -0.5, -1, 12500, 5750),
|
|
156
|
+
(10000, 3000, 5000, 2500, 0, -1, 10000, 3000),
|
|
157
|
+
(10000, 3000, 5000, 2500, 0.5, -1, 7500, 250),
|
|
158
|
+
(10000, 3000, 5000, 2500, 1, -1, 5000, 2500),
|
|
159
|
+
(10000, 3000, 5000, 2500, 1.5, -1, 2500, 5250),
|
|
160
|
+
# different normal distributions, no correlation
|
|
161
|
+
(10000, 3000, 5000, 2500, -0.5, 0, 12500, sqrt(21812500)),
|
|
162
|
+
(10000, 3000, 5000, 2500, 0, 0, 10000, 3000),
|
|
163
|
+
(10000, 3000, 5000, 2500, 0.5, 0, 7500, sqrt(3812500)),
|
|
164
|
+
(10000, 3000, 5000, 2500, 1, 0, 5000, 2500),
|
|
165
|
+
(10000, 3000, 5000, 2500, 1.5, 0, 2500, sqrt(16312500)),
|
|
166
|
+
# different normal distributions, perfectly positive correlation
|
|
167
|
+
(10000, 3000, 5000, 2500, -0.5, 1, 12500, 3250),
|
|
168
|
+
(10000, 3000, 5000, 2500, 0, 1, 10000, 3000),
|
|
169
|
+
(10000, 3000, 5000, 2500, 0.5, 1, 7500, 2750.0),
|
|
170
|
+
(10000, 3000, 5000, 2500, 1, 1, 5000, 2500),
|
|
171
|
+
(10000, 3000, 5000, 2500, 1.5, 1, 2500, 2250)
|
|
172
|
+
]
|
|
173
|
+
IDS_LERP_NORMAL_DIST = [
|
|
174
|
+
f"N({mu_1}, {sigma_1}^2) - N({mu_2}, {sigma_2}^2), alpha: {alpha}, rho: {rho}"
|
|
175
|
+
for mu_1, sigma_1, mu_2, sigma_2, alpha, rho, *_ in PARAMS_LERP_NORMAL_DIST
|
|
176
|
+
]
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
@mark.parametrize(
|
|
180
|
+
"mu_1, sigma_1, mu_2, sigma_2, alpha, rho, Z_mean, Z_sigma",
|
|
181
|
+
PARAMS_LERP_NORMAL_DIST,
|
|
182
|
+
ids=IDS_LERP_NORMAL_DIST
|
|
183
|
+
)
|
|
184
|
+
def test_lerp_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, alpha, rho, Z_mean, Z_sigma):
|
|
185
|
+
result = lerp_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, alpha, rho)
|
|
186
|
+
assert result == (Z_mean, Z_sigma)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from numpy import array, e, inf
|
|
2
|
+
from numpy.typing import NDArray
|
|
3
|
+
from numpy.testing import assert_almost_equal
|
|
4
|
+
from hestia_earth.utils.date import YEAR
|
|
5
|
+
from hestia_earth.models.utils.time_series import (
|
|
6
|
+
calc_tau, compute_time_series_correlation_matrix, exponential_decay
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
from pytest import mark
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
SEED = 0
|
|
13
|
+
N_ITERATIONS = 10000
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# datestrs, half_life, expected
|
|
17
|
+
PARAMS_COMPUTE_CORRELATION_MATRIX = [
|
|
18
|
+
(
|
|
19
|
+
['2000-01-01', '2000-01-02', '2000-01-03', '2000-01-04', '2000-01-05'],
|
|
20
|
+
1,
|
|
21
|
+
array([
|
|
22
|
+
[1.0, 0.5, 0.25, 0.125, 0.0625],
|
|
23
|
+
[0.5, 1.0, 0.5, 0.25, 0.125],
|
|
24
|
+
[0.25, 0.5, 1.0, 0.5, 0.25],
|
|
25
|
+
[0.125, 0.25, 0.5, 1.0, 0.5],
|
|
26
|
+
[0.0625, 0.125, 0.25, 0.5, 1.0]
|
|
27
|
+
])
|
|
28
|
+
),
|
|
29
|
+
(
|
|
30
|
+
['2000-01-01', '2001-01-01', '2002-01-01', '2003-01-01'],
|
|
31
|
+
20*YEAR,
|
|
32
|
+
array([
|
|
33
|
+
[1.0, 0.965867, 0.932987, 0.901227],
|
|
34
|
+
[0.965867, 1.0, 0.965959, 0.933076],
|
|
35
|
+
[0.932987, 0.965959, 1.0, 0.965959],
|
|
36
|
+
[0.901227, 0.933076, 0.965959, 1.0]
|
|
37
|
+
])
|
|
38
|
+
)
|
|
39
|
+
]
|
|
40
|
+
IDS_COMPUTE_CORRELATION_MATRIX = [
|
|
41
|
+
"dt: 1d, half-life: 1d",
|
|
42
|
+
"dt: 1y, half-life: 20y"
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@mark.parametrize(
|
|
47
|
+
"datestrs, half_life, expected",
|
|
48
|
+
PARAMS_COMPUTE_CORRELATION_MATRIX,
|
|
49
|
+
ids=IDS_COMPUTE_CORRELATION_MATRIX
|
|
50
|
+
)
|
|
51
|
+
def test_compute_time_series_correlation_matrix(datestrs: list[str], half_life: float, expected: NDArray):
|
|
52
|
+
tau = calc_tau(half_life)
|
|
53
|
+
result = compute_time_series_correlation_matrix(
|
|
54
|
+
datestrs,
|
|
55
|
+
decay_fn=lambda dt: exponential_decay(dt, tau=tau)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
assert_almost_equal(result, expected, decimal=6)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
# half_life, expected
|
|
62
|
+
PARAMS_CALC_TAU = [(0.693147, 1), (1, 1.442695), (20, 28.853901), (YEAR, 526.933543)]
|
|
63
|
+
IDS_CALC_TAU = [half_life for half_life, *_ in PARAMS_CALC_TAU]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@mark.parametrize("half_life, expected", PARAMS_CALC_TAU, ids=IDS_CALC_TAU)
|
|
67
|
+
def test_calc_tau(half_life: float, expected: float):
|
|
68
|
+
result = calc_tau(half_life)
|
|
69
|
+
assert_almost_equal(result, expected, decimal=6)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
PARAMS_EXPONENTIAL_DECAY = [
|
|
73
|
+
(0, 1, 1, 0, 1),
|
|
74
|
+
(1, 1, 1, 0, e ** -1),
|
|
75
|
+
(inf, 1, 1, 0, 0),
|
|
76
|
+
(YEAR, calc_tau(YEAR), 1.5, -3, -0.75) # 1 year w/ half-life = 1 year and custom min/max
|
|
77
|
+
]
|
|
78
|
+
IDS_EXPONENTIAL_DECAY = [f"t: {t:0.2f}, tau: {tau:0.2f}" for t, tau, *_ in PARAMS_EXPONENTIAL_DECAY]
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
@mark.parametrize(
|
|
82
|
+
"t, tau, initial_value, final_value, expected",
|
|
83
|
+
PARAMS_EXPONENTIAL_DECAY,
|
|
84
|
+
ids=IDS_EXPONENTIAL_DECAY
|
|
85
|
+
)
|
|
86
|
+
def test_exponential_decay(t: float, tau: float, initial_value: float, final_value: float, expected: float):
|
|
87
|
+
result = exponential_decay(t, tau, initial_value, final_value)
|
|
88
|
+
assert_almost_equal(result, expected, decimal=6)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|