openforis-whisp 2.0.0a4__tar.gz → 2.0.0a5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/PKG-INFO +37 -46
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/README.md +36 -45
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/pyproject.toml +1 -1
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/data_conversion.py +176 -54
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/datasets.py +6 -3
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/parameters/config_runtime.py +1 -1
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/parameters/lookup_context_and_metadata.csv +1 -1
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/parameters/lookup_gee_datasets.csv +1 -1
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/risk.py +24 -30
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/stats.py +206 -25
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/utils.py +3 -3
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/LICENSE +0 -0
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/__init__.py +0 -0
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/logger.py +0 -0
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/parameters/__init__.py +0 -0
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/pd_schemas.py +0 -0
- {openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/reformat.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: openforis-whisp
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.0a5
|
|
4
4
|
Summary: Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations.
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: whisp,geospatial,data-processing
|
|
@@ -77,8 +77,6 @@ Description-Content-Type: text/markdown
|
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
## Whisp datasets <a name="whisp_datasets"></a>
|
|
80
|
-
All output columns from Whisp are described in [this excel file](https://github.com/forestdatapartnership/whisp/blob/main/whisp_columns.xlsx)
|
|
81
|
-
|
|
82
80
|
***Whisp*** implements the convergence of evidence approach by providing a transparent and public processing flow using datasets covering the following categories:
|
|
83
81
|
|
|
84
82
|
1) Tree and forest cover (at the end of 2020);
|
|
@@ -86,27 +84,39 @@ Description-Content-Type: text/markdown
|
|
|
86
84
|
3) Disturbances **before 2020** (i.e., degradation or deforestation until 2020-12-31);
|
|
87
85
|
4) Disturbances **after 2020** (i.e., degradation or deforestation from 2021-01-01 onward).
|
|
88
86
|
|
|
87
|
+
Additional categories are specific for the timber commodity, considering a harvesting date in 2023:
|
|
88
|
+
|
|
89
|
+
5) Primary forests in 2020;
|
|
90
|
+
6) Naturally regenerating forests in 2020;
|
|
91
|
+
7) Planted and plantation forests in 2020;
|
|
92
|
+
8) Planted and plantation forests in 2023;
|
|
93
|
+
9) Treecover in 2023;
|
|
94
|
+
10) Commodities or croplands in 2023.
|
|
95
|
+
11) Logging concessions;
|
|
96
|
+
|
|
89
97
|
There are multiple datasets for each category. Find the full current [list of datasets used in Whisp here](https://github.com/forestdatapartnership/whisp/blob/main/layers_description.md).
|
|
90
|
-
|
|
98
|
+
|
|
99
|
+
### Whisp risk assessment <a name="whisp_risk"></a>
|
|
100
|
+
|
|
101
|
+
Whisp checks the plots provided by the user by running zonal statistics on them to answer the following questions:
|
|
91
102
|
|
|
92
103
|
1) Was there tree cover in 2020?
|
|
93
104
|
2) Were there commodity plantations or other agricultural uses in 2020?
|
|
94
105
|
3) Were there disturbances until 2020-12-31?
|
|
95
106
|
4) Were there disturbances after 2020-12-31 / starting 2021-01-01?
|
|
96
107
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
If one or more treecover datasets indicate tree cover on a plot by the end of 2020, but a commodity dataset indicates agricultural use by the end of 2020, **Whisp will categorize the deforestation risk as low.**
|
|
100
|
-
|
|
101
|
-
If treecover datasets indicate tree cover on a plot by late 2020, no commodity datasets indicate agricultural use, but a disturbance dataset indicates disturbances before the end of 2020, **Whisp will categorize the deforestation risk as <u>low</u>.** Such deforestation has happened before 2020, which aligns with the cutoff date for legislation such as EUDR, and is therefore not considered high risk.
|
|
102
|
-
|
|
103
|
-
Now, if the datasets under 1., 2. & 3. indicate that there was tree cover, but no agriculture and no disturbances before or by the end of 2020, the Whisp algorithm checks whether degradation or deforestation have been reported in a disturbance dataset after 2020-12-31. If they have, **Whisp will categorize the deforestation risk as <u>high</u>.** <br>
|
|
104
|
-
However, under the same circumstances but with <u>no</u> disturbances reported after 2020-12-31 there is insufficient evidence and the **Whisp output will be "More info needed".** Such can be the case for, e.g., cocoa or coffee grown under the shade of treecover or agroforestry.
|
|
108
|
+
And specifically for the timber commodity, considering a harvesting date in 2023:
|
|
105
109
|
|
|
110
|
+
5) Were there primary forests in 2020?
|
|
111
|
+
6) Were there naturally regenerating forests in 2020?
|
|
112
|
+
7) Were there planted and plantation forests in 2020?
|
|
113
|
+
8) Were there planted and plantation forests in 2023?
|
|
114
|
+
9) Was there treecover in 2023?
|
|
115
|
+
10) Were there commodity plantations or other agricultural uses in 2023?
|
|
116
|
+
11) Is it part of a logging concession?
|
|
106
117
|
|
|
107
|
-
*The Whisp algorithm for **Perennial Crops** visualized:*
|
|
108
|
-

|
|
109
118
|
The Whisp algorithm outputs multiple statistical columns with disaggregated data from the input datasets, followed by aggregated indicator columns, and the final risk assessment columns.
|
|
119
|
+
All output columns from Whisp are described in [this excel file](https://github.com/forestdatapartnership/whisp/blob/main/whisp_columns.xlsx)
|
|
110
120
|
|
|
111
121
|
The **relevant risk assessment column depends on the commodity** in question:
|
|
112
122
|
|
|
@@ -141,47 +151,28 @@ The **relevant risk assessment column depends on the commodity** in question:
|
|
|
141
151
|
</tr>
|
|
142
152
|
</table>
|
|
143
153
|
|
|
144
|
-
The
|
|
145
|
-
|
|
146
|
-
|
|
154
|
+
*The Whisp algorithm for **Perennial Crops** visualized:*
|
|
155
|
+

|
|
156
|
+
|
|
157
|
+
If no treecover dataset indicates any tree cover for a plot by the end of 2020, **Whisp will categorize the deforestation risk as low.**
|
|
147
158
|
|
|
159
|
+
If one or more treecover datasets indicate tree cover on a plot by the end of 2020, but a commodity dataset indicates agricultural use by the end of 2020, **Whisp will categorize the deforestation risk as low.**
|
|
148
160
|
|
|
149
|
-
|
|
150
|
-
***Whisp*** implements the convergence of evidence approach by providing a transparent and public processing flow using datasets covering the following categories:
|
|
151
|
-
1) Tree and forest cover (at the end of 2020);
|
|
152
|
-
2) Commodities (i.e., crop plantations and other agricultural uses at the end of 2020);
|
|
153
|
-
3) Disturbances **before 2020** (i.e., degradation or deforestation until 2020-12-31);
|
|
154
|
-
4) Disturbances **after 2020** (i.e., degradation or deforestation from 2021-01-01 onward).
|
|
155
|
-
5) Primary forests in 2020;
|
|
156
|
-
6) Naturally regenerating forests in 2020;
|
|
157
|
-
7) Planted and plantation forests in 2020;
|
|
158
|
-
8) Planted and plantation forests in 2023;
|
|
159
|
-
9) Treecover in 2023;
|
|
160
|
-
10) Commodities or croplands in 2023.
|
|
161
|
-
11) Logging concessions;
|
|
161
|
+
If treecover datasets indicate tree cover on a plot by late 2020, no commodity datasets indicate agricultural use, but a disturbance dataset indicates disturbances before the end of 2020, **Whisp will categorize the deforestation risk as <u>low</u>.** Such deforestation has happened before 2020, which aligns with the cutoff date for legislation such as EUDR, and is therefore not considered high risk.
|
|
162
162
|
|
|
163
|
-
|
|
164
|
-
|
|
163
|
+
Now, if the datasets under 1., 2. & 3. indicate that there was tree cover, but no agriculture and no disturbances before or by the end of 2020, the Whisp algorithm checks whether degradation or deforestation have been reported in a disturbance dataset after 2020-12-31. If they have, **Whisp will categorize the deforestation risk as <u>high</u>.** <br>
|
|
164
|
+
However, under the same circumstances but with <u>no</u> disturbances reported after 2020-12-31 there is insufficient evidence and the **Whisp output will be "More info needed".** Such can be the case for, e.g., cocoa or coffee grown under the shade of treecover or agroforestry.
|
|
165
165
|
|
|
166
|
-
1) Was there tree cover in 2020?
|
|
167
|
-
2) Were there commodity plantations or other agricultural uses in 2020?
|
|
168
|
-
3) Were there disturbances until 2020-12-31?
|
|
169
|
-
4) Were there disturbances after 2020-12-31 / starting 2021-01-01?
|
|
170
|
-
5) Were there primary forests in 2020?
|
|
171
|
-
6) Were there naturally regenerating forests in 2020?
|
|
172
|
-
7) Were there planted and plantation forests in 2020?
|
|
173
|
-
8) Were there planted and plantation forests in 2023?
|
|
174
|
-
9) Was there treecover in 2023?
|
|
175
|
-
10) Were there commodity plantations or other agricultural uses in 2023?
|
|
176
|
-
11) Were there logging concessions?
|
|
177
166
|
|
|
178
|
-
|
|
167
|
+
## Run Whisp python package from a notebook <a name="whisp_notebooks"></a>
|
|
179
168
|
|
|
180
169
|
For most users we suggest using the Whisp App to process their plot data. But for some, using the python package directly will fit their workflow.
|
|
181
170
|
|
|
182
171
|
A simple example of the package functionality can be seen in this [Colab Notebook](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/Colab_whisp_geojson_to_csv.ipynb)
|
|
183
172
|
|
|
184
|
-
|
|
173
|
+
For an example notebook adapted for running locally (or in Sepal), see: [whisp_geojson_to_csv.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_geojson_to_csv.ipynb) or if datasets are very large, see [whisp_geojson_to_drive.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_geojson_to_drive.ipynb)
|
|
174
|
+
|
|
175
|
+
### Requirements for running the package
|
|
185
176
|
|
|
186
177
|
- A Google Earth Engine (GEE) account.
|
|
187
178
|
- A registered cloud GEE project.
|
|
@@ -190,7 +181,7 @@ The **relevant risk assessment column depends on the commodity** in question:
|
|
|
190
181
|
More info on Whisp can be found in [here](https://openknowledge.fao.org/items/e9284dc7-4b19-4f9c-b3e1-e6c142585865)
|
|
191
182
|
|
|
192
183
|
|
|
193
|
-
|
|
184
|
+
### Python package installation
|
|
194
185
|
|
|
195
186
|
The Whisp package is available on pip
|
|
196
187
|
https://pypi.org/project/openforis-whisp/
|
|
@@ -40,8 +40,6 @@
|
|
|
40
40
|
|
|
41
41
|
|
|
42
42
|
## Whisp datasets <a name="whisp_datasets"></a>
|
|
43
|
-
All output columns from Whisp are described in [this excel file](https://github.com/forestdatapartnership/whisp/blob/main/whisp_columns.xlsx)
|
|
44
|
-
|
|
45
43
|
***Whisp*** implements the convergence of evidence approach by providing a transparent and public processing flow using datasets covering the following categories:
|
|
46
44
|
|
|
47
45
|
1) Tree and forest cover (at the end of 2020);
|
|
@@ -49,27 +47,39 @@
|
|
|
49
47
|
3) Disturbances **before 2020** (i.e., degradation or deforestation until 2020-12-31);
|
|
50
48
|
4) Disturbances **after 2020** (i.e., degradation or deforestation from 2021-01-01 onward).
|
|
51
49
|
|
|
50
|
+
Additional categories are specific for the timber commodity, considering a harvesting date in 2023:
|
|
51
|
+
|
|
52
|
+
5) Primary forests in 2020;
|
|
53
|
+
6) Naturally regenerating forests in 2020;
|
|
54
|
+
7) Planted and plantation forests in 2020;
|
|
55
|
+
8) Planted and plantation forests in 2023;
|
|
56
|
+
9) Treecover in 2023;
|
|
57
|
+
10) Commodities or croplands in 2023.
|
|
58
|
+
11) Logging concessions;
|
|
59
|
+
|
|
52
60
|
There are multiple datasets for each category. Find the full current [list of datasets used in Whisp here](https://github.com/forestdatapartnership/whisp/blob/main/layers_description.md).
|
|
53
|
-
|
|
61
|
+
|
|
62
|
+
### Whisp risk assessment <a name="whisp_risk"></a>
|
|
63
|
+
|
|
64
|
+
Whisp checks the plots provided by the user by running zonal statistics on them to answer the following questions:
|
|
54
65
|
|
|
55
66
|
1) Was there tree cover in 2020?
|
|
56
67
|
2) Were there commodity plantations or other agricultural uses in 2020?
|
|
57
68
|
3) Were there disturbances until 2020-12-31?
|
|
58
69
|
4) Were there disturbances after 2020-12-31 / starting 2021-01-01?
|
|
59
70
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
If one or more treecover datasets indicate tree cover on a plot by the end of 2020, but a commodity dataset indicates agricultural use by the end of 2020, **Whisp will categorize the deforestation risk as low.**
|
|
63
|
-
|
|
64
|
-
If treecover datasets indicate tree cover on a plot by late 2020, no commodity datasets indicate agricultural use, but a disturbance dataset indicates disturbances before the end of 2020, **Whisp will categorize the deforestation risk as <u>low</u>.** Such deforestation has happened before 2020, which aligns with the cutoff date for legislation such as EUDR, and is therefore not considered high risk.
|
|
65
|
-
|
|
66
|
-
Now, if the datasets under 1., 2. & 3. indicate that there was tree cover, but no agriculture and no disturbances before or by the end of 2020, the Whisp algorithm checks whether degradation or deforestation have been reported in a disturbance dataset after 2020-12-31. If they have, **Whisp will categorize the deforestation risk as <u>high</u>.** <br>
|
|
67
|
-
However, under the same circumstances but with <u>no</u> disturbances reported after 2020-12-31 there is insufficient evidence and the **Whisp output will be "More info needed".** Such can be the case for, e.g., cocoa or coffee grown under the shade of treecover or agroforestry.
|
|
71
|
+
And specifically for the timber commodity, considering a harvesting date in 2023:
|
|
68
72
|
|
|
73
|
+
5) Were there primary forests in 2020?
|
|
74
|
+
6) Were there naturally regenerating forests in 2020?
|
|
75
|
+
7) Were there planted and plantation forests in 2020?
|
|
76
|
+
8) Were there planted and plantation forests in 2023?
|
|
77
|
+
9) Was there treecover in 2023?
|
|
78
|
+
10) Were there commodity plantations or other agricultural uses in 2023?
|
|
79
|
+
11) Is it part of a logging concession?
|
|
69
80
|
|
|
70
|
-
*The Whisp algorithm for **Perennial Crops** visualized:*
|
|
71
|
-

|
|
72
81
|
The Whisp algorithm outputs multiple statistical columns with disaggregated data from the input datasets, followed by aggregated indicator columns, and the final risk assessment columns.
|
|
82
|
+
All output columns from Whisp are described in [this excel file](https://github.com/forestdatapartnership/whisp/blob/main/whisp_columns.xlsx)
|
|
73
83
|
|
|
74
84
|
The **relevant risk assessment column depends on the commodity** in question:
|
|
75
85
|
|
|
@@ -104,47 +114,28 @@ The **relevant risk assessment column depends on the commodity** in question:
|
|
|
104
114
|
</tr>
|
|
105
115
|
</table>
|
|
106
116
|
|
|
107
|
-
The
|
|
108
|
-
|
|
109
|
-
|
|
117
|
+
*The Whisp algorithm for **Perennial Crops** visualized:*
|
|
118
|
+

|
|
119
|
+
|
|
120
|
+
If no treecover dataset indicates any tree cover for a plot by the end of 2020, **Whisp will categorize the deforestation risk as low.**
|
|
110
121
|
|
|
122
|
+
If one or more treecover datasets indicate tree cover on a plot by the end of 2020, but a commodity dataset indicates agricultural use by the end of 2020, **Whisp will categorize the deforestation risk as low.**
|
|
111
123
|
|
|
112
|
-
|
|
113
|
-
***Whisp*** implements the convergence of evidence approach by providing a transparent and public processing flow using datasets covering the following categories:
|
|
114
|
-
1) Tree and forest cover (at the end of 2020);
|
|
115
|
-
2) Commodities (i.e., crop plantations and other agricultural uses at the end of 2020);
|
|
116
|
-
3) Disturbances **before 2020** (i.e., degradation or deforestation until 2020-12-31);
|
|
117
|
-
4) Disturbances **after 2020** (i.e., degradation or deforestation from 2021-01-01 onward).
|
|
118
|
-
5) Primary forests in 2020;
|
|
119
|
-
6) Naturally regenerating forests in 2020;
|
|
120
|
-
7) Planted and plantation forests in 2020;
|
|
121
|
-
8) Planted and plantation forests in 2023;
|
|
122
|
-
9) Treecover in 2023;
|
|
123
|
-
10) Commodities or croplands in 2023.
|
|
124
|
-
11) Logging concessions;
|
|
124
|
+
If treecover datasets indicate tree cover on a plot by late 2020, no commodity datasets indicate agricultural use, but a disturbance dataset indicates disturbances before the end of 2020, **Whisp will categorize the deforestation risk as <u>low</u>.** Such deforestation has happened before 2020, which aligns with the cutoff date for legislation such as EUDR, and is therefore not considered high risk.
|
|
125
125
|
|
|
126
|
-
|
|
127
|
-
|
|
126
|
+
Now, if the datasets under 1., 2. & 3. indicate that there was tree cover, but no agriculture and no disturbances before or by the end of 2020, the Whisp algorithm checks whether degradation or deforestation have been reported in a disturbance dataset after 2020-12-31. If they have, **Whisp will categorize the deforestation risk as <u>high</u>.** <br>
|
|
127
|
+
However, under the same circumstances but with <u>no</u> disturbances reported after 2020-12-31 there is insufficient evidence and the **Whisp output will be "More info needed".** Such can be the case for, e.g., cocoa or coffee grown under the shade of treecover or agroforestry.
|
|
128
128
|
|
|
129
|
-
1) Was there tree cover in 2020?
|
|
130
|
-
2) Were there commodity plantations or other agricultural uses in 2020?
|
|
131
|
-
3) Were there disturbances until 2020-12-31?
|
|
132
|
-
4) Were there disturbances after 2020-12-31 / starting 2021-01-01?
|
|
133
|
-
5) Were there primary forests in 2020?
|
|
134
|
-
6) Were there naturally regenerating forests in 2020?
|
|
135
|
-
7) Were there planted and plantation forests in 2020?
|
|
136
|
-
8) Were there planted and plantation forests in 2023?
|
|
137
|
-
9) Was there treecover in 2023?
|
|
138
|
-
10) Were there commodity plantations or other agricultural uses in 2023?
|
|
139
|
-
11) Were there logging concessions?
|
|
140
129
|
|
|
141
|
-
|
|
130
|
+
## Run Whisp python package from a notebook <a name="whisp_notebooks"></a>
|
|
142
131
|
|
|
143
132
|
For most users we suggest using the Whisp App to process their plot data. But for some, using the python package directly will fit their workflow.
|
|
144
133
|
|
|
145
134
|
A simple example of the package functionality can be seen in this [Colab Notebook](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/Colab_whisp_geojson_to_csv.ipynb)
|
|
146
135
|
|
|
147
|
-
|
|
136
|
+
For an example notebook adapted for running locally (or in Sepal), see: [whisp_geojson_to_csv.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_geojson_to_csv.ipynb) or if datasets are very large, see [whisp_geojson_to_drive.ipynb](https://github.com/forestdatapartnership/whisp/blob/main/notebooks/whisp_geojson_to_drive.ipynb)
|
|
137
|
+
|
|
138
|
+
### Requirements for running the package
|
|
148
139
|
|
|
149
140
|
- A Google Earth Engine (GEE) account.
|
|
150
141
|
- A registered cloud GEE project.
|
|
@@ -153,7 +144,7 @@ The **relevant risk assessment column depends on the commodity** in question:
|
|
|
153
144
|
More info on Whisp can be found in [here](https://openknowledge.fao.org/items/e9284dc7-4b19-4f9c-b3e1-e6c142585865)
|
|
154
145
|
|
|
155
146
|
|
|
156
|
-
|
|
147
|
+
### Python package installation
|
|
157
148
|
|
|
158
149
|
The Whisp package is available on pip
|
|
159
150
|
https://pypi.org/project/openforis-whisp/
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "openforis-whisp"
|
|
7
|
-
version = "2.0.
|
|
7
|
+
version = "2.0.0a5"
|
|
8
8
|
description = "Whisp (What is in that plot) is an open-source solution which helps to produce relevant forest monitoring information and support compliance with deforestation-related regulations."
|
|
9
9
|
repository = "https://github.com/forestdatapartnership/whisp"
|
|
10
10
|
authors = ["Andy Arnell <and.arnell@fao.org>"]
|
|
@@ -12,55 +12,32 @@ import geopandas as gpd
|
|
|
12
12
|
import ee
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def convert_ee_to_geojson(ee_object, filename=None, indent=2, **kwargs):
|
|
16
|
-
"""Converts Earth Engine object to geojson.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
ee_object (object): An Earth Engine object.
|
|
20
|
-
filename (str, optional): The file path to save the geojson. Defaults to None.
|
|
21
|
-
|
|
22
|
-
Returns:
|
|
23
|
-
object: GeoJSON object.
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
try:
|
|
27
|
-
if (
|
|
28
|
-
isinstance(ee_object, ee.Geometry)
|
|
29
|
-
or isinstance(ee_object, ee.Feature)
|
|
30
|
-
or isinstance(ee_object, ee.FeatureCollection)
|
|
31
|
-
):
|
|
32
|
-
json_object = ee_object.getInfo()
|
|
33
|
-
if filename is not None:
|
|
34
|
-
filename = os.path.abspath(filename)
|
|
35
|
-
if not os.path.exists(os.path.dirname(filename)):
|
|
36
|
-
os.makedirs(os.path.dirname(filename))
|
|
37
|
-
with open(filename, "w") as f:
|
|
38
|
-
f.write(json.dumps(json_object, indent=indent, **kwargs) + "\n")
|
|
39
|
-
else:
|
|
40
|
-
return json_object
|
|
41
|
-
else:
|
|
42
|
-
print("Could not convert the Earth Engine object to geojson")
|
|
43
|
-
except Exception as e:
|
|
44
|
-
raise Exception(e)
|
|
45
|
-
|
|
46
|
-
|
|
47
15
|
def convert_geojson_to_ee(
|
|
48
|
-
geojson_filepath: Any, enforce_wgs84: bool = True
|
|
16
|
+
geojson_filepath: Any, enforce_wgs84: bool = True, strip_z_coords: bool = True
|
|
49
17
|
) -> ee.FeatureCollection:
|
|
50
18
|
"""
|
|
51
19
|
Reads a GeoJSON file from the given path and converts it to an Earth Engine FeatureCollection.
|
|
52
20
|
Optionally checks and converts the CRS to WGS 84 (EPSG:4326) if needed.
|
|
21
|
+
Automatically handles 3D coordinates by stripping Z values when necessary.
|
|
53
22
|
|
|
54
23
|
Args:
|
|
55
24
|
geojson_filepath (Any): The filepath to the GeoJSON file.
|
|
56
25
|
enforce_wgs84 (bool): Whether to enforce WGS 84 projection (EPSG:4326). Defaults to True.
|
|
26
|
+
strip_z_coords (bool): Whether to automatically strip Z coordinates from 3D geometries. Defaults to True.
|
|
57
27
|
|
|
58
28
|
Returns:
|
|
59
29
|
ee.FeatureCollection: Earth Engine FeatureCollection created from the GeoJSON.
|
|
60
30
|
"""
|
|
61
31
|
if isinstance(geojson_filepath, (str, Path)):
|
|
62
32
|
file_path = os.path.abspath(geojson_filepath)
|
|
63
|
-
|
|
33
|
+
|
|
34
|
+
# Apply print_once deduplication for file reading message
|
|
35
|
+
if not hasattr(convert_geojson_to_ee, "_printed_file_messages"):
|
|
36
|
+
convert_geojson_to_ee._printed_file_messages = set()
|
|
37
|
+
|
|
38
|
+
if file_path not in convert_geojson_to_ee._printed_file_messages:
|
|
39
|
+
print(f"Reading GeoJSON file from: {file_path}")
|
|
40
|
+
convert_geojson_to_ee._printed_file_messages.add(file_path)
|
|
64
41
|
|
|
65
42
|
# Use GeoPandas to read the file and handle CRS
|
|
66
43
|
gdf = gpd.read_file(file_path)
|
|
@@ -82,9 +59,133 @@ def convert_geojson_to_ee(
|
|
|
82
59
|
if validation_errors:
|
|
83
60
|
raise ValueError(f"GeoJSON validation errors: {validation_errors}")
|
|
84
61
|
|
|
85
|
-
|
|
62
|
+
# Try to create the feature collection, handle 3D coordinate issues automatically
|
|
63
|
+
try:
|
|
64
|
+
feature_collection = ee.FeatureCollection(
|
|
65
|
+
create_feature_collection(geojson_data)
|
|
66
|
+
)
|
|
67
|
+
return feature_collection
|
|
68
|
+
except ee.EEException as e:
|
|
69
|
+
if "Invalid GeoJSON geometry" in str(e) and strip_z_coords:
|
|
70
|
+
# Apply print_once deduplication for Z-coordinate stripping messages
|
|
71
|
+
if not hasattr(convert_geojson_to_ee, "_printed_z_messages"):
|
|
72
|
+
convert_geojson_to_ee._printed_z_messages = set()
|
|
73
|
+
|
|
74
|
+
z_message_key = f"z_coords_{file_path}"
|
|
75
|
+
if z_message_key not in convert_geojson_to_ee._printed_z_messages:
|
|
76
|
+
print(
|
|
77
|
+
"Warning: Invalid GeoJSON geometry detected, likely due to 3D coordinates."
|
|
78
|
+
)
|
|
79
|
+
print("Attempting to fix by stripping Z coordinates...")
|
|
80
|
+
convert_geojson_to_ee._printed_z_messages.add(z_message_key)
|
|
81
|
+
|
|
82
|
+
# Apply Z-coordinate stripping
|
|
83
|
+
geojson_data_fixed = _strip_z_coordinates_from_geojson(geojson_data)
|
|
84
|
+
|
|
85
|
+
# Try again with the fixed data
|
|
86
|
+
try:
|
|
87
|
+
feature_collection = ee.FeatureCollection(
|
|
88
|
+
create_feature_collection(geojson_data_fixed)
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
success_message_key = f"z_coords_success_{file_path}"
|
|
92
|
+
if success_message_key not in convert_geojson_to_ee._printed_z_messages:
|
|
93
|
+
print("✓ Successfully converted after stripping Z coordinates")
|
|
94
|
+
convert_geojson_to_ee._printed_z_messages.add(success_message_key)
|
|
95
|
+
|
|
96
|
+
return feature_collection
|
|
97
|
+
except Exception as retry_error:
|
|
98
|
+
raise ee.EEException(
|
|
99
|
+
f"Failed to convert GeoJSON even after stripping Z coordinates: {retry_error}"
|
|
100
|
+
)
|
|
101
|
+
else:
|
|
102
|
+
raise e
|
|
86
103
|
|
|
87
|
-
|
|
104
|
+
|
|
105
|
+
def _strip_z_coordinates_from_geojson(geojson_data: dict) -> dict:
|
|
106
|
+
"""
|
|
107
|
+
Helper function to strip Z coordinates from GeoJSON data.
|
|
108
|
+
Converts 3D coordinates to 2D by removing Z values.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
geojson_data (dict): GeoJSON data dictionary
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
dict: GeoJSON data with Z coordinates stripped
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def strip_z(geometry):
|
|
118
|
+
"""Remove Z coordinates from geometry to make it 2D"""
|
|
119
|
+
if geometry["type"] == "MultiPolygon":
|
|
120
|
+
geometry["coordinates"] = [
|
|
121
|
+
[[[lon, lat] for lon, lat, *_ in ring] for ring in polygon]
|
|
122
|
+
for polygon in geometry["coordinates"]
|
|
123
|
+
]
|
|
124
|
+
elif geometry["type"] == "Polygon":
|
|
125
|
+
geometry["coordinates"] = [
|
|
126
|
+
[[lon, lat] for lon, lat, *_ in ring]
|
|
127
|
+
for ring in geometry["coordinates"]
|
|
128
|
+
]
|
|
129
|
+
elif geometry["type"] == "Point":
|
|
130
|
+
if len(geometry["coordinates"]) > 2:
|
|
131
|
+
geometry["coordinates"] = geometry["coordinates"][:2]
|
|
132
|
+
elif geometry["type"] == "MultiPoint":
|
|
133
|
+
geometry["coordinates"] = [coord[:2] for coord in geometry["coordinates"]]
|
|
134
|
+
elif geometry["type"] == "LineString":
|
|
135
|
+
geometry["coordinates"] = [
|
|
136
|
+
[lon, lat] for lon, lat, *_ in geometry["coordinates"]
|
|
137
|
+
]
|
|
138
|
+
elif geometry["type"] == "MultiLineString":
|
|
139
|
+
geometry["coordinates"] = [
|
|
140
|
+
[[lon, lat] for lon, lat, *_ in line]
|
|
141
|
+
for line in geometry["coordinates"]
|
|
142
|
+
]
|
|
143
|
+
return geometry
|
|
144
|
+
|
|
145
|
+
# Create a deep copy to avoid modifying the original
|
|
146
|
+
import copy
|
|
147
|
+
|
|
148
|
+
geojson_copy = copy.deepcopy(geojson_data)
|
|
149
|
+
|
|
150
|
+
# Process all features
|
|
151
|
+
if "features" in geojson_copy:
|
|
152
|
+
for feature in geojson_copy["features"]:
|
|
153
|
+
if "geometry" in feature and feature["geometry"]:
|
|
154
|
+
feature["geometry"] = strip_z(feature["geometry"])
|
|
155
|
+
|
|
156
|
+
return geojson_copy
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def convert_ee_to_geojson(ee_object, filename=None, indent=2, **kwargs):
|
|
160
|
+
"""Converts Earth Engine object to geojson.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
ee_object (object): An Earth Engine object.
|
|
164
|
+
filename (str, optional): The file path to save the geojson. Defaults to None.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
object: GeoJSON object.
|
|
168
|
+
"""
|
|
169
|
+
|
|
170
|
+
try:
|
|
171
|
+
if (
|
|
172
|
+
isinstance(ee_object, ee.Geometry)
|
|
173
|
+
or isinstance(ee_object, ee.Feature)
|
|
174
|
+
or isinstance(ee_object, ee.FeatureCollection)
|
|
175
|
+
):
|
|
176
|
+
json_object = ee_object.getInfo()
|
|
177
|
+
if filename is not None:
|
|
178
|
+
filename = os.path.abspath(filename)
|
|
179
|
+
if not os.path.exists(os.path.dirname(filename)):
|
|
180
|
+
os.makedirs(os.path.dirname(filename))
|
|
181
|
+
with open(filename, "w") as f:
|
|
182
|
+
f.write(json.dumps(json_object, indent=indent, **kwargs) + "\n")
|
|
183
|
+
else:
|
|
184
|
+
return json_object
|
|
185
|
+
else:
|
|
186
|
+
print("Could not convert the Earth Engine object to geojson")
|
|
187
|
+
except Exception as e:
|
|
188
|
+
raise Exception(e)
|
|
88
189
|
|
|
89
190
|
|
|
90
191
|
def convert_geojson_to_shapefile(geojson_path, shapefile_output_path):
|
|
@@ -252,28 +353,49 @@ def validate_geojson(input_data: Any) -> List[str]:
|
|
|
252
353
|
return errors
|
|
253
354
|
|
|
254
355
|
|
|
255
|
-
def extract_features(
|
|
356
|
+
def extract_features(geojson_obj: Any, features: List[Feature]) -> None:
|
|
256
357
|
"""
|
|
257
|
-
Recursively extracts features from a
|
|
358
|
+
Recursively extracts features from a GeoJSON object and adds them to the feature list.
|
|
258
359
|
|
|
259
|
-
:param
|
|
360
|
+
:param geojson_obj: GeoJSON object (could be geometry, feature, or feature collection)
|
|
260
361
|
:param features: List of extracted features
|
|
261
362
|
"""
|
|
262
|
-
if
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
363
|
+
if isinstance(geojson_obj, dict):
|
|
364
|
+
obj_type = geojson_obj.get("type")
|
|
365
|
+
|
|
366
|
+
if obj_type == "Feature":
|
|
367
|
+
# Extract the actual Feature with properties
|
|
368
|
+
geometry = geojson_obj.get("geometry", {})
|
|
369
|
+
properties = geojson_obj.get("properties", {})
|
|
370
|
+
|
|
371
|
+
if geometry and geometry.get("type"):
|
|
372
|
+
features.append(Feature(geometry=geometry, properties=properties))
|
|
373
|
+
|
|
374
|
+
elif obj_type == "FeatureCollection":
|
|
375
|
+
# Process each feature in the collection
|
|
376
|
+
for feature in geojson_obj.get("features", []):
|
|
377
|
+
extract_features(feature, features)
|
|
378
|
+
|
|
379
|
+
elif obj_type in [
|
|
380
|
+
"Polygon",
|
|
381
|
+
"Point",
|
|
382
|
+
"MultiPolygon",
|
|
383
|
+
"LineString",
|
|
384
|
+
"MultiPoint",
|
|
385
|
+
"MultiLineString",
|
|
386
|
+
]:
|
|
387
|
+
# This is a raw geometry - create feature with empty properties
|
|
388
|
+
features.append(Feature(geometry=geojson_obj, properties={}))
|
|
389
|
+
|
|
390
|
+
elif obj_type == "GeometryCollection":
|
|
391
|
+
# Handle geometry collections
|
|
392
|
+
for geom in geojson_obj.get("geometries", []):
|
|
393
|
+
extract_features(geom, features)
|
|
394
|
+
|
|
395
|
+
elif isinstance(geojson_obj, list):
|
|
396
|
+
# Handle lists of features/geometries
|
|
397
|
+
for item in geojson_obj:
|
|
398
|
+
extract_features(item, features)
|
|
277
399
|
|
|
278
400
|
|
|
279
401
|
def create_feature_collection(geojson_obj: Any) -> FeatureCollection:
|
|
@@ -883,9 +883,9 @@ def nbr_terraclass_amz20_secondary_prep():
|
|
|
883
883
|
|
|
884
884
|
# Cerrado - filtered with QGIS because the original geodatabase is too large to export as a shapefile (GEE accepted format)
|
|
885
885
|
def nbr_bfs_cer_f20_prep():
|
|
886
|
-
bfs_fcer20 = ee.FeatureCollection("projects/ee-whisp/assets/NBR/
|
|
886
|
+
bfs_fcer20 = ee.FeatureCollection("projects/ee-whisp/assets/NBR/bfs_cerr_2020")
|
|
887
887
|
bfs_fcer20_binary = ee.Image().paint(bfs_fcer20, 1)
|
|
888
|
-
return bfs_fcer20_binary.rename("
|
|
888
|
+
return bfs_fcer20_binary.rename("nBR_BFS_primary_and_secondary_forest_Cerrado_2020")
|
|
889
889
|
|
|
890
890
|
|
|
891
891
|
# %%
|
|
@@ -1264,7 +1264,9 @@ def combine_datasets(national_codes=None):
|
|
|
1264
1264
|
|
|
1265
1265
|
try:
|
|
1266
1266
|
# Attempt to print band names to check for errors
|
|
1267
|
-
print(img_combined.bandNames().getInfo())
|
|
1267
|
+
# print(img_combined.bandNames().getInfo())
|
|
1268
|
+
img_combined.bandNames().getInfo()
|
|
1269
|
+
|
|
1268
1270
|
except ee.EEException as e:
|
|
1269
1271
|
# logger.error(f"Error printing band names: {e}")
|
|
1270
1272
|
# logger.info("Running code for filtering to only valid datasets due to error in input")
|
|
@@ -1281,6 +1283,7 @@ def combine_datasets(national_codes=None):
|
|
|
1281
1283
|
img_combined = img_combined.addBands(img)
|
|
1282
1284
|
|
|
1283
1285
|
img_combined = img_combined.multiply(ee.Image.pixelArea())
|
|
1286
|
+
print("Whisp multiband image compiled")
|
|
1284
1287
|
|
|
1285
1288
|
return img_combined
|
|
1286
1289
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude_from_output,col_type,is_nullable,is_required,corresponding_variable
|
|
2
2
|
plotId,-10,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,plot_id_column
|
|
3
|
-
external_id,-9,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,
|
|
3
|
+
external_id,-9,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,external_id_column
|
|
4
4
|
Area,-8,,context_and_metadata,context_and_metadata,NA,NA,0,float32,1,1,geometry_area_column
|
|
5
5
|
Geometry_type,-7,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,geometry_type_column
|
|
6
6
|
Country,-6,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,iso3_country_column
|
|
@@ -179,7 +179,7 @@ nBR_BFS_primary_forest_Pantanal_2020,2402,BR,treecover,primary,1,1,0,float32,1,0
|
|
|
179
179
|
nBR_BFS_primary_forest_Caatinga_2020,2403,BR,treecover,primary,1,1,0,float32,1,0,nbr_bfs_caat_f20_prep
|
|
180
180
|
nBR_BFS_primary_forest_AtlanticForest_2020,2404,BR,treecover,primary,1,1,0,float32,1,0,nbr_bfs_atlf_f20_prep
|
|
181
181
|
nBR_BFS_primary_forest_Pampa_2020,2405,BR,treecover,primary,1,1,0,float32,1,0,nbr_bfs_pmp_f20_prep
|
|
182
|
-
|
|
182
|
+
nBR_BFS_primary_and_secondary_forest_Cerrado_2020,2406,BR,treecover,naturally_reg_2020,1,1,0,float32,1,0,nbr_bfs_cer_f20_prep
|
|
183
183
|
nBR_MapBiomas_col9_forest_Brazil_2020,2407,BR,treecover,naturally_reg_2020,1,1,0,float32,1,0,nbr_mapbiomasc9_f20_prep
|
|
184
184
|
nBR_INPE_TCsilviculture_Amazon_2020,2408,BR,treecover,planted_plantation_2020,1,1,0,float32,1,0,nbr_terraclass_amz20_silv_prep
|
|
185
185
|
nBR_INPE_TCsilviculture_Cerrado_2020,2409,BR,treecover,planted_plantation_2020,1,1,0,float32,1,0,nbr_terraclass_silv_cer20_prep
|
|
@@ -272,16 +272,12 @@ def whisp_risk(
|
|
|
272
272
|
df=df_w_indicators,
|
|
273
273
|
ind_1_name=ind_1_name,
|
|
274
274
|
ind_2_name=ind_2_name,
|
|
275
|
-
ind_3_name=ind_3_name,
|
|
276
275
|
ind_4_name=ind_4_name,
|
|
277
276
|
)
|
|
278
277
|
|
|
279
278
|
df_w_indicators_and_risk_timber = add_eudr_risk_timber_col(
|
|
280
279
|
df=df_w_indicators,
|
|
281
|
-
ind_1_name=ind_1_name,
|
|
282
280
|
ind_2_name=ind_2_name,
|
|
283
|
-
ind_3_name=ind_3_name,
|
|
284
|
-
ind_4_name=ind_4_name,
|
|
285
281
|
ind_5_name=ind_5_name,
|
|
286
282
|
ind_6_name=ind_6_name,
|
|
287
283
|
ind_7_name=ind_7_name,
|
|
@@ -306,10 +302,10 @@ def add_eudr_risk_pcrop_col(
|
|
|
306
302
|
|
|
307
303
|
Args:
|
|
308
304
|
df (DataFrame): Input DataFrame.
|
|
309
|
-
ind_1_name (str): Name of first indicator column.
|
|
310
|
-
ind_2_name (str): Name of second indicator column.
|
|
311
|
-
ind_3_name (str): Name of third indicator column.
|
|
312
|
-
ind_4_name (str): Name of fourth indicator column.
|
|
305
|
+
ind_1_name (str, optional): Name of first indicator column. Defaults to "Ind_01_treecover".
|
|
306
|
+
ind_2_name (str, optional): Name of second indicator column. Defaults to "Ind_02_commodities".
|
|
307
|
+
ind_3_name (str, optional): Name of third indicator column. Defaults to "Ind_03_disturbance_before_2020".
|
|
308
|
+
ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Ind_04_disturbance_after_2020".
|
|
313
309
|
|
|
314
310
|
Returns:
|
|
315
311
|
DataFrame: DataFrame with added 'EUDR_risk' column.
|
|
@@ -337,7 +333,6 @@ def add_eudr_risk_acrop_col(
|
|
|
337
333
|
df: data_lookup_type,
|
|
338
334
|
ind_1_name: str,
|
|
339
335
|
ind_2_name: str,
|
|
340
|
-
ind_3_name: str,
|
|
341
336
|
ind_4_name: str,
|
|
342
337
|
) -> data_lookup_type:
|
|
343
338
|
"""
|
|
@@ -345,10 +340,9 @@ def add_eudr_risk_acrop_col(
|
|
|
345
340
|
|
|
346
341
|
Args:
|
|
347
342
|
df (DataFrame): Input DataFrame.
|
|
348
|
-
ind_1_name (str, optional): Name of first indicator column. Defaults to "
|
|
349
|
-
ind_2_name (str, optional): Name of second indicator column. Defaults to "
|
|
350
|
-
|
|
351
|
-
ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Indicator_4_disturbance_after_2020".
|
|
343
|
+
ind_1_name (str, optional): Name of first indicator column. Defaults to "Ind_01_treecover".
|
|
344
|
+
ind_2_name (str, optional): Name of second indicator column. Defaults to "Ind_02_commodities".
|
|
345
|
+
ind_4_name (str, optional): Name of fourth indicator column. Defaults to "Ind_04_disturbance_after_2020".
|
|
352
346
|
|
|
353
347
|
Returns:
|
|
354
348
|
DataFrame: DataFrame with added 'EUDR_risk' column.
|
|
@@ -371,10 +365,7 @@ def add_eudr_risk_acrop_col(
|
|
|
371
365
|
|
|
372
366
|
def add_eudr_risk_timber_col(
|
|
373
367
|
df: data_lookup_type,
|
|
374
|
-
ind_1_name: str,
|
|
375
368
|
ind_2_name: str,
|
|
376
|
-
ind_3_name: str,
|
|
377
|
-
ind_4_name: str,
|
|
378
369
|
ind_5_name: str,
|
|
379
370
|
ind_6_name: str,
|
|
380
371
|
ind_7_name: str,
|
|
@@ -388,51 +379,54 @@ def add_eudr_risk_timber_col(
|
|
|
388
379
|
|
|
389
380
|
Args:
|
|
390
381
|
df (DataFrame): Input DataFrame.
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
382
|
+
ind_2_name (str, optional): Name of second indicator column. Defaults to "Ind_02_commodities".
|
|
383
|
+
ind_5_name (str, optional): Name of fifth indicator column. Defaults to "Ind_05_primary_2020".
|
|
384
|
+
ind_6_name (str, optional): Name of sixth indicator column. Defaults to "Ind_06_nat_reg_forest_2020".
|
|
385
|
+
ind_7_name (str, optional): Name of seventh indicator column. Defaults to "Ind_07_planted_plantations_2020".
|
|
386
|
+
ind_8_name (str, optional): Name of eighth indicator column. Defaults to "Ind_08_planted_plantations_after_2020".
|
|
387
|
+
ind_9_name (str, optional): Name of ninth indicator column. Defaults to "Ind_09_treecover_after_2020".
|
|
388
|
+
ind_10_name (str, optional): Name of tenth indicator column. Defaults to "Ind_10_agri_after_2020".
|
|
389
|
+
ind_11_name (str, optional): Name of eleventh indicator column. Defaults to "Ind_11_logging_concession_before_2020".
|
|
395
390
|
|
|
396
391
|
Returns:
|
|
397
392
|
DataFrame: DataFrame with added 'EUDR_risk' column.
|
|
398
393
|
"""
|
|
399
394
|
|
|
400
395
|
for index, row in df.iterrows():
|
|
401
|
-
# If there is a commodity in 2020
|
|
396
|
+
# If there is a commodity in 2020 (ind_2_name)
|
|
397
|
+
# OR if there is planted-plantation in 2020 (ind_7_name) AND no agriculture in 2023 (ind_10_name), set EUDR_risk_timber to "low"
|
|
402
398
|
if row[ind_2_name] == "yes" or (
|
|
403
399
|
row[ind_7_name] == "yes" and row[ind_10_name] == "no"
|
|
404
400
|
):
|
|
405
401
|
df.at[index, "risk_timber"] = "low"
|
|
406
|
-
# If there is
|
|
407
|
-
# if row[ind_1_name] == "no" or row[ind_3_name] == "yes" or row[ind_7_name] == "yes":
|
|
408
|
-
# df.at[index, 'EUDR_risk_degrad'] = "low"
|
|
409
|
-
# If primary or naturally regenerating or planted forest in 2020 AND agricultural use in 2023, set EUDR_risk to high
|
|
402
|
+
# If there is a natural forest primary (ind_5_name) or naturally regenerating (ind_6_name) or planted forest (ind_7_name) in 2020 AND agricultural after 2020 (ind_10_name), set EUDR_timber to high
|
|
410
403
|
elif (
|
|
411
404
|
row[ind_5_name] == "yes"
|
|
412
405
|
or row[ind_6_name] == "yes"
|
|
413
406
|
or row[ind_7_name] == "yes"
|
|
414
407
|
) and row[ind_10_name] == "yes":
|
|
415
408
|
df.at[index, "risk_timber"] = "high"
|
|
416
|
-
# If primary or naturally regenerating AND planted
|
|
409
|
+
# If there is a natural forest primary (ind_5_name) or naturally regenerating (ind_6_name) AND planted after 2020 (ind_8_name), set EUDR_risk to "high"
|
|
417
410
|
elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and row[
|
|
418
411
|
ind_8_name
|
|
419
412
|
] == "yes":
|
|
420
413
|
df.at[index, "risk_timber"] = "high"
|
|
414
|
+
# No data yet on OWL conversion
|
|
421
415
|
# If primary or naturally regenerating or planted forest in 2020 and OWL in 2023, set EUDR_risk to high
|
|
422
416
|
# elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes" or row[ind_7_name] == "yes") and row[ind_10_name] == "yes":
|
|
423
417
|
# df.at[index, 'EUDR_risk_timber'] = "high"
|
|
424
418
|
|
|
425
|
-
# If primary forest OR naturally regenerating AND an information on management practice OR tree cover post 2020, set
|
|
419
|
+
# If there is a natural primary forest (ind_5_name) OR naturally regenerating in 2020 (ind_6_name) AND an information on management practice any time (ind_11_name) OR tree cover or regrowth post 2020 (ind_9_name), set EUDR_risk_timber to "low"
|
|
426
420
|
elif (row[ind_5_name] == "yes" or row[ind_6_name] == "yes") and (
|
|
427
421
|
row[ind_9_name] == "yes" or row[ind_11_name] == "yes"
|
|
428
422
|
):
|
|
429
423
|
df.at[index, "risk_timber"] = "low"
|
|
430
|
-
# If primary
|
|
424
|
+
# If primary (ind_5_name) OR naturally regenerating in 2020 (ind_6_name) and no other info, set EUDR_risk to "more_info_needed"
|
|
431
425
|
elif row[ind_5_name] == "yes" or row[ind_6_name] == "yes":
|
|
432
426
|
df.at[index, "risk_timber"] = "more_info_needed"
|
|
433
|
-
# If none of the above conditions are met, set EUDR_risk to "
|
|
427
|
+
# If none of the above conditions are met, set EUDR_risk to "low"
|
|
434
428
|
else:
|
|
435
|
-
df.at[index, "risk_timber"] = "
|
|
429
|
+
df.at[index, "risk_timber"] = "low"
|
|
436
430
|
|
|
437
431
|
return df
|
|
438
432
|
|
|
@@ -6,7 +6,7 @@ import json
|
|
|
6
6
|
import country_converter as coco
|
|
7
7
|
from openforis_whisp.parameters.config_runtime import (
|
|
8
8
|
plot_id_column,
|
|
9
|
-
|
|
9
|
+
external_id_column,
|
|
10
10
|
geometry_type_column,
|
|
11
11
|
geometry_area_column,
|
|
12
12
|
geometry_area_column_formatting,
|
|
@@ -57,6 +57,8 @@ def whisp_formatted_stats_geojson_to_df(
|
|
|
57
57
|
The filepath to the GeoJSON of the ROI to analyze.
|
|
58
58
|
external_id_column : str, optional
|
|
59
59
|
The column in the GeoJSON containing external IDs to be preserved in the output DataFrame.
|
|
60
|
+
This column must exist as a property in ALL features of the GeoJSON file.
|
|
61
|
+
Use debug_feature_collection_properties() to inspect available properties if you encounter errors.
|
|
60
62
|
remove_geom : bool, default=False
|
|
61
63
|
If True, the geometry of the GeoJSON is removed from the output DataFrame.
|
|
62
64
|
national_codes : list, optional
|
|
@@ -369,7 +371,11 @@ def whisp_stats_geojson_to_drive(
|
|
|
369
371
|
|
|
370
372
|
|
|
371
373
|
def whisp_stats_ee_to_ee(
|
|
372
|
-
feature_collection,
|
|
374
|
+
feature_collection,
|
|
375
|
+
external_id_column,
|
|
376
|
+
national_codes=None,
|
|
377
|
+
unit_type="ha",
|
|
378
|
+
keep_properties=None,
|
|
373
379
|
):
|
|
374
380
|
"""
|
|
375
381
|
Process a feature collection to get statistics for each feature.
|
|
@@ -379,46 +385,68 @@ def whisp_stats_ee_to_ee(
|
|
|
379
385
|
external_id_column (str): The name of the external ID column to check.
|
|
380
386
|
national_codes (list, optional): List of ISO2 country codes to include national datasets.
|
|
381
387
|
unit_type (str): Whether to use hectares ("ha") or percentage ("percent"), default "ha".
|
|
388
|
+
keep_properties (None, bool, or list, optional): Properties to keep from the input features.
|
|
389
|
+
- None: Remove all properties (default behavior)
|
|
390
|
+
- True: Keep all properties
|
|
391
|
+
- list: Keep only the specified properties
|
|
382
392
|
|
|
383
393
|
Returns:
|
|
384
394
|
ee.FeatureCollection: The output feature collection with statistics.
|
|
385
395
|
"""
|
|
386
396
|
if external_id_column is not None:
|
|
387
397
|
try:
|
|
388
|
-
#
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
feature,
|
|
393
|
-
ee.Feature(
|
|
394
|
-
None
|
|
395
|
-
), # Return an empty feature if the column does not exist
|
|
396
|
-
)
|
|
397
|
-
|
|
398
|
-
feature_collection_with_check = feature_collection.map(check_column_exists)
|
|
399
|
-
size_fc = feature_collection.size()
|
|
400
|
-
valid_feature_count = feature_collection_with_check.filter(
|
|
401
|
-
ee.Filter.notNull([external_id_column])
|
|
402
|
-
).size()
|
|
398
|
+
# Validate that the external_id_column exists in all features
|
|
399
|
+
validation_result = validate_external_id_column(
|
|
400
|
+
feature_collection, external_id_column
|
|
401
|
+
)
|
|
403
402
|
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
403
|
+
if not validation_result["is_valid"]:
|
|
404
|
+
raise ValueError(validation_result["error_message"])
|
|
405
|
+
|
|
406
|
+
# First handle property selection, but preserve the external_id_column
|
|
407
|
+
if keep_properties is not None:
|
|
408
|
+
if keep_properties == True:
|
|
409
|
+
# Keep all properties including external_id_column
|
|
410
|
+
pass # No need to modify feature_collection
|
|
411
|
+
elif isinstance(keep_properties, list):
|
|
412
|
+
# Ensure external_id_column is included in the list
|
|
413
|
+
if external_id_column not in keep_properties:
|
|
414
|
+
keep_properties = keep_properties + [external_id_column]
|
|
415
|
+
feature_collection = feature_collection.select(keep_properties)
|
|
416
|
+
else:
|
|
417
|
+
raise ValueError(
|
|
418
|
+
"keep_properties must be None, True, or a list of property names."
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# Set the external_id with robust null handling
|
|
422
|
+
def set_external_id_safely_and_clean(feature):
|
|
423
|
+
external_id_value = feature.get(external_id_column)
|
|
424
|
+
# Use server-side null checking and string conversion
|
|
425
|
+
external_id_value = ee.Algorithms.If(
|
|
426
|
+
ee.Algorithms.IsEqual(external_id_value, None),
|
|
427
|
+
"unknown",
|
|
428
|
+
ee.String(external_id_value),
|
|
408
429
|
)
|
|
430
|
+
# Create a new feature with the standardized external_id column
|
|
431
|
+
# Note: we use "external_id" as the standardized column name, not the original external_id_column name
|
|
432
|
+
return ee.Feature(feature.set("external_id", external_id_value))
|
|
409
433
|
|
|
410
|
-
# Set the geo_id_column
|
|
411
434
|
feature_collection = feature_collection.map(
|
|
412
|
-
|
|
413
|
-
geo_id_column, ee.String(feature.get(external_id_column))
|
|
414
|
-
)
|
|
435
|
+
set_external_id_safely_and_clean
|
|
415
436
|
)
|
|
416
437
|
|
|
438
|
+
# Finally, clean up to keep only geometry and external_id if keep_properties is None
|
|
439
|
+
if keep_properties is None:
|
|
440
|
+
feature_collection = feature_collection.select(["external_id"])
|
|
441
|
+
|
|
417
442
|
except Exception as e:
|
|
418
443
|
# Handle the exception and provide a helpful error message
|
|
419
444
|
print(
|
|
420
445
|
f"An error occurred when trying to set the external_id_column: {external_id_column}. Error: {e}"
|
|
421
446
|
)
|
|
447
|
+
raise e # Re-raise the exception to stop execution
|
|
448
|
+
else:
|
|
449
|
+
feature_collection = _keep_fc_properties(feature_collection, keep_properties)
|
|
422
450
|
|
|
423
451
|
fc = get_stats(
|
|
424
452
|
feature_collection, national_codes=national_codes, unit_type=unit_type
|
|
@@ -427,6 +455,23 @@ def whisp_stats_ee_to_ee(
|
|
|
427
455
|
return add_id_to_feature_collection(dataset=fc, id_name=plot_id_column)
|
|
428
456
|
|
|
429
457
|
|
|
458
|
+
def _keep_fc_properties(feature_collection, keep_properties):
|
|
459
|
+
# If keep_properties is specified, select only those properties
|
|
460
|
+
if keep_properties is None:
|
|
461
|
+
feature_collection = feature_collection.select([])
|
|
462
|
+
elif keep_properties == True:
|
|
463
|
+
# If keep_properties is true, select all properties
|
|
464
|
+
first_feature_props = feature_collection.first().propertyNames().getInfo()
|
|
465
|
+
feature_collection = feature_collection.select(first_feature_props)
|
|
466
|
+
elif isinstance(keep_properties, list):
|
|
467
|
+
feature_collection = feature_collection.select(keep_properties)
|
|
468
|
+
else:
|
|
469
|
+
raise ValueError(
|
|
470
|
+
"keep_properties must be None, True, or a list of property names."
|
|
471
|
+
)
|
|
472
|
+
return feature_collection
|
|
473
|
+
|
|
474
|
+
|
|
430
475
|
def whisp_stats_ee_to_df(
|
|
431
476
|
feature_collection: ee.FeatureCollection,
|
|
432
477
|
external_id_column=None,
|
|
@@ -951,3 +996,139 @@ def convert_iso3_to_iso2(df, iso3_column, iso2_column):
|
|
|
951
996
|
)
|
|
952
997
|
|
|
953
998
|
return df
|
|
999
|
+
|
|
1000
|
+
|
|
1001
|
+
def validate_external_id_column(feature_collection, external_id_column):
|
|
1002
|
+
"""
|
|
1003
|
+
Validates that the external_id_column exists in all features of the collection.
|
|
1004
|
+
|
|
1005
|
+
Parameters
|
|
1006
|
+
----------
|
|
1007
|
+
feature_collection : ee.FeatureCollection
|
|
1008
|
+
The feature collection to validate
|
|
1009
|
+
external_id_column : str
|
|
1010
|
+
The name of the external ID column to check
|
|
1011
|
+
|
|
1012
|
+
Returns
|
|
1013
|
+
-------
|
|
1014
|
+
dict
|
|
1015
|
+
Dictionary with validation results including:
|
|
1016
|
+
- 'is_valid': bool indicating if column exists in all features
|
|
1017
|
+
- 'total_features': int total number of features
|
|
1018
|
+
- 'features_with_column': int number of features that have the column
|
|
1019
|
+
- 'available_properties': list of properties available in first feature
|
|
1020
|
+
- 'error_message': str error message if validation fails
|
|
1021
|
+
"""
|
|
1022
|
+
try:
|
|
1023
|
+
# Get total number of features
|
|
1024
|
+
total_features = feature_collection.size().getInfo()
|
|
1025
|
+
|
|
1026
|
+
if total_features == 0:
|
|
1027
|
+
return {
|
|
1028
|
+
"is_valid": False,
|
|
1029
|
+
"total_features": 0,
|
|
1030
|
+
"features_with_column": 0,
|
|
1031
|
+
"available_properties": [],
|
|
1032
|
+
"error_message": "Feature collection is empty",
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
# Get available properties from first feature
|
|
1036
|
+
first_feature_props = feature_collection.first().propertyNames().getInfo()
|
|
1037
|
+
|
|
1038
|
+
# Check if external_id_column exists in all features
|
|
1039
|
+
def check_column_exists(feature):
|
|
1040
|
+
has_column = feature.propertyNames().contains(external_id_column)
|
|
1041
|
+
return feature.set("_has_external_id", has_column)
|
|
1042
|
+
|
|
1043
|
+
features_with_check = feature_collection.map(check_column_exists)
|
|
1044
|
+
features_with_column = (
|
|
1045
|
+
features_with_check.filter(ee.Filter.eq("_has_external_id", True))
|
|
1046
|
+
.size()
|
|
1047
|
+
.getInfo()
|
|
1048
|
+
)
|
|
1049
|
+
|
|
1050
|
+
is_valid = features_with_column == total_features
|
|
1051
|
+
|
|
1052
|
+
error_message = None
|
|
1053
|
+
if not is_valid:
|
|
1054
|
+
missing_count = total_features - features_with_column
|
|
1055
|
+
error_message = (
|
|
1056
|
+
f"The column '{external_id_column}' is missing from {missing_count} "
|
|
1057
|
+
f"out of {total_features} features in the collection. "
|
|
1058
|
+
f"Available properties in first feature: {first_feature_props}"
|
|
1059
|
+
)
|
|
1060
|
+
|
|
1061
|
+
return {
|
|
1062
|
+
"is_valid": is_valid,
|
|
1063
|
+
"total_features": total_features,
|
|
1064
|
+
"features_with_column": features_with_column,
|
|
1065
|
+
"available_properties": first_feature_props,
|
|
1066
|
+
"error_message": error_message,
|
|
1067
|
+
}
|
|
1068
|
+
|
|
1069
|
+
except Exception as e:
|
|
1070
|
+
return {
|
|
1071
|
+
"is_valid": False,
|
|
1072
|
+
"total_features": 0,
|
|
1073
|
+
"features_with_column": 0,
|
|
1074
|
+
"available_properties": [],
|
|
1075
|
+
"error_message": f"Error during validation: {str(e)}",
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
|
|
1079
|
+
def debug_feature_collection_properties(feature_collection, max_features=5):
|
|
1080
|
+
"""
|
|
1081
|
+
Debug helper function to inspect the properties of features in a collection.
|
|
1082
|
+
|
|
1083
|
+
Parameters
|
|
1084
|
+
----------
|
|
1085
|
+
feature_collection : ee.FeatureCollection
|
|
1086
|
+
The feature collection to inspect
|
|
1087
|
+
max_features : int, optional
|
|
1088
|
+
Maximum number of features to inspect, by default 5
|
|
1089
|
+
|
|
1090
|
+
Returns
|
|
1091
|
+
-------
|
|
1092
|
+
dict
|
|
1093
|
+
Dictionary with debugging information about the feature collection
|
|
1094
|
+
"""
|
|
1095
|
+
try:
|
|
1096
|
+
total_features = feature_collection.size().getInfo()
|
|
1097
|
+
|
|
1098
|
+
if total_features == 0:
|
|
1099
|
+
return {"total_features": 0, "error": "Feature collection is empty"}
|
|
1100
|
+
|
|
1101
|
+
# Limit the number of features to inspect
|
|
1102
|
+
features_to_check = min(max_features, total_features)
|
|
1103
|
+
limited_fc = feature_collection.limit(features_to_check)
|
|
1104
|
+
|
|
1105
|
+
# Get properties for each feature
|
|
1106
|
+
def get_feature_properties(feature):
|
|
1107
|
+
return ee.Dictionary(
|
|
1108
|
+
{
|
|
1109
|
+
"properties": feature.propertyNames(),
|
|
1110
|
+
"geometry_type": feature.geometry().type(),
|
|
1111
|
+
}
|
|
1112
|
+
)
|
|
1113
|
+
|
|
1114
|
+
feature_info = limited_fc.map(get_feature_properties).getInfo()
|
|
1115
|
+
|
|
1116
|
+
return {
|
|
1117
|
+
"total_features": total_features,
|
|
1118
|
+
"inspected_features": features_to_check,
|
|
1119
|
+
"feature_details": [
|
|
1120
|
+
{
|
|
1121
|
+
"feature_index": i,
|
|
1122
|
+
"properties": feature_info["features"][i]["properties"][
|
|
1123
|
+
"properties"
|
|
1124
|
+
],
|
|
1125
|
+
"geometry_type": feature_info["features"][i]["properties"][
|
|
1126
|
+
"geometry_type"
|
|
1127
|
+
],
|
|
1128
|
+
}
|
|
1129
|
+
for i in range(len(feature_info["features"]))
|
|
1130
|
+
],
|
|
1131
|
+
}
|
|
1132
|
+
|
|
1133
|
+
except Exception as e:
|
|
1134
|
+
return {"error": f"Error during debugging: {str(e)}"}
|
|
@@ -113,9 +113,9 @@ def remove_geometry_from_feature_collection(feature_collection):
|
|
|
113
113
|
return feature_collection_no_geometry
|
|
114
114
|
|
|
115
115
|
|
|
116
|
-
# Compute centroids of each polygon
|
|
117
|
-
def get_centroid(feature,
|
|
118
|
-
keepProperties = [
|
|
116
|
+
# Compute centroids of each polygon including the external_id_column
|
|
117
|
+
def get_centroid(feature, external_id_column="external_id"):
|
|
118
|
+
keepProperties = [external_id_column]
|
|
119
119
|
# Get the centroid of the feature's geometry.
|
|
120
120
|
centroid = feature.geometry().centroid(1)
|
|
121
121
|
# Return a new Feature, copying properties from the old Feature.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{openforis_whisp-2.0.0a4 → openforis_whisp-2.0.0a5}/src/openforis_whisp/parameters/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|