openforis-whisp 2.0.0a6__py3-none-any.whl → 2.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openforis_whisp/__init__.py +74 -75
- openforis_whisp/data_conversion.py +493 -493
- openforis_whisp/datasets.py +1377 -1384
- openforis_whisp/logger.py +75 -75
- openforis_whisp/parameters/__init__.py +15 -15
- openforis_whisp/parameters/config_runtime.py +44 -44
- openforis_whisp/parameters/lookup_context_and_metadata.csv +13 -13
- openforis_whisp/parameters/lookup_gee_datasets.csv +2 -1
- openforis_whisp/pd_schemas.py +77 -77
- openforis_whisp/reformat.py +696 -495
- openforis_whisp/risk.py +848 -771
- openforis_whisp/stats.py +1228 -1134
- openforis_whisp/utils.py +194 -154
- {openforis_whisp-2.0.0a6.dist-info → openforis_whisp-2.0.0b1.dist-info}/LICENSE +21 -21
- {openforis_whisp-2.0.0a6.dist-info → openforis_whisp-2.0.0b1.dist-info}/METADATA +2 -2
- openforis_whisp-2.0.0b1.dist-info/RECORD +17 -0
- {openforis_whisp-2.0.0a6.dist-info → openforis_whisp-2.0.0b1.dist-info}/WHEEL +1 -1
- openforis_whisp-2.0.0a6.dist-info/RECORD +0 -17
openforis_whisp/logger.py
CHANGED
|
@@ -1,75 +1,75 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import sys
|
|
3
|
-
|
|
4
|
-
BASE_MSG_FORMAT = (
|
|
5
|
-
"[%(filename)s | %(funcName)s() | l.%(lineno)s] %(levelname)s: %(message)s"
|
|
6
|
-
)
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class StdoutLogger:
|
|
10
|
-
def __init__(self, name: str, msg_format: str = BASE_MSG_FORMAT) -> None:
|
|
11
|
-
self.handler = logging.StreamHandler(sys.stdout)
|
|
12
|
-
self.handler.setFormatter(logging.Formatter(msg_format))
|
|
13
|
-
self.handler.setLevel(logging.DEBUG)
|
|
14
|
-
self.logger = logging.getLogger(name)
|
|
15
|
-
self.logger.addHandler(self.handler)
|
|
16
|
-
self.logger.propagate = False
|
|
17
|
-
|
|
18
|
-
# Add missing methods that delegate to the internal logger
|
|
19
|
-
def debug(self, message):
|
|
20
|
-
self.logger.debug(message)
|
|
21
|
-
|
|
22
|
-
def info(self, message):
|
|
23
|
-
self.logger.info(message)
|
|
24
|
-
|
|
25
|
-
def warning(self, message):
|
|
26
|
-
self.logger.warning(message)
|
|
27
|
-
|
|
28
|
-
def error(self, message):
|
|
29
|
-
self.logger.error(message)
|
|
30
|
-
|
|
31
|
-
def critical(self, message):
|
|
32
|
-
self.logger.critical(message)
|
|
33
|
-
|
|
34
|
-
def setLevel(self, level):
|
|
35
|
-
self.logger.setLevel(level)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class FileLogger:
|
|
39
|
-
def __init__(
|
|
40
|
-
self,
|
|
41
|
-
log_filepath: str,
|
|
42
|
-
msg_format: str = BASE_MSG_FORMAT,
|
|
43
|
-
log_to_stdout: bool = True,
|
|
44
|
-
) -> None:
|
|
45
|
-
self.handler = logging.FileHandler(log_filepath)
|
|
46
|
-
self.handler.setFormatter(logging.Formatter(msg_format))
|
|
47
|
-
self.handler.setLevel(logging.DEBUG)
|
|
48
|
-
self.logger = logging.getLogger(f"{__name__}.file_logger_{log_filepath}")
|
|
49
|
-
self.logger.addHandler(self.handler)
|
|
50
|
-
self.logger.propagate = False
|
|
51
|
-
|
|
52
|
-
if log_to_stdout:
|
|
53
|
-
self.stdout_handler = logging.StreamHandler(sys.stdout)
|
|
54
|
-
self.stdout_handler.setFormatter(logging.Formatter(msg_format))
|
|
55
|
-
self.stdout_handler.setLevel(logging.DEBUG)
|
|
56
|
-
self.logger.addHandler(self.stdout_handler)
|
|
57
|
-
|
|
58
|
-
# Add missing methods for FileLogger too
|
|
59
|
-
def debug(self, message):
|
|
60
|
-
self.logger.debug(message)
|
|
61
|
-
|
|
62
|
-
def info(self, message):
|
|
63
|
-
self.logger.info(message)
|
|
64
|
-
|
|
65
|
-
def warning(self, message):
|
|
66
|
-
self.logger.warning(message)
|
|
67
|
-
|
|
68
|
-
def error(self, message):
|
|
69
|
-
self.logger.error(message)
|
|
70
|
-
|
|
71
|
-
def critical(self, message):
|
|
72
|
-
self.logger.critical(message)
|
|
73
|
-
|
|
74
|
-
def setLevel(self, level):
|
|
75
|
-
self.logger.setLevel(level)
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
|
|
4
|
+
BASE_MSG_FORMAT = (
|
|
5
|
+
"[%(filename)s | %(funcName)s() | l.%(lineno)s] %(levelname)s: %(message)s"
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class StdoutLogger:
|
|
10
|
+
def __init__(self, name: str, msg_format: str = BASE_MSG_FORMAT) -> None:
|
|
11
|
+
self.handler = logging.StreamHandler(sys.stdout)
|
|
12
|
+
self.handler.setFormatter(logging.Formatter(msg_format))
|
|
13
|
+
self.handler.setLevel(logging.DEBUG)
|
|
14
|
+
self.logger = logging.getLogger(name)
|
|
15
|
+
self.logger.addHandler(self.handler)
|
|
16
|
+
self.logger.propagate = False
|
|
17
|
+
|
|
18
|
+
# Add missing methods that delegate to the internal logger
|
|
19
|
+
def debug(self, message):
|
|
20
|
+
self.logger.debug(message)
|
|
21
|
+
|
|
22
|
+
def info(self, message):
|
|
23
|
+
self.logger.info(message)
|
|
24
|
+
|
|
25
|
+
def warning(self, message):
|
|
26
|
+
self.logger.warning(message)
|
|
27
|
+
|
|
28
|
+
def error(self, message):
|
|
29
|
+
self.logger.error(message)
|
|
30
|
+
|
|
31
|
+
def critical(self, message):
|
|
32
|
+
self.logger.critical(message)
|
|
33
|
+
|
|
34
|
+
def setLevel(self, level):
|
|
35
|
+
self.logger.setLevel(level)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class FileLogger:
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
log_filepath: str,
|
|
42
|
+
msg_format: str = BASE_MSG_FORMAT,
|
|
43
|
+
log_to_stdout: bool = True,
|
|
44
|
+
) -> None:
|
|
45
|
+
self.handler = logging.FileHandler(log_filepath)
|
|
46
|
+
self.handler.setFormatter(logging.Formatter(msg_format))
|
|
47
|
+
self.handler.setLevel(logging.DEBUG)
|
|
48
|
+
self.logger = logging.getLogger(f"{__name__}.file_logger_{log_filepath}")
|
|
49
|
+
self.logger.addHandler(self.handler)
|
|
50
|
+
self.logger.propagate = False
|
|
51
|
+
|
|
52
|
+
if log_to_stdout:
|
|
53
|
+
self.stdout_handler = logging.StreamHandler(sys.stdout)
|
|
54
|
+
self.stdout_handler.setFormatter(logging.Formatter(msg_format))
|
|
55
|
+
self.stdout_handler.setLevel(logging.DEBUG)
|
|
56
|
+
self.logger.addHandler(self.stdout_handler)
|
|
57
|
+
|
|
58
|
+
# Add missing methods for FileLogger too
|
|
59
|
+
def debug(self, message):
|
|
60
|
+
self.logger.debug(message)
|
|
61
|
+
|
|
62
|
+
def info(self, message):
|
|
63
|
+
self.logger.info(message)
|
|
64
|
+
|
|
65
|
+
def warning(self, message):
|
|
66
|
+
self.logger.warning(message)
|
|
67
|
+
|
|
68
|
+
def error(self, message):
|
|
69
|
+
self.logger.error(message)
|
|
70
|
+
|
|
71
|
+
def critical(self, message):
|
|
72
|
+
self.logger.critical(message)
|
|
73
|
+
|
|
74
|
+
def setLevel(self, level):
|
|
75
|
+
self.logger.setLevel(level)
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
"""
|
|
2
|
-
!!! BAD PRACTICE, ALWAYS IMPORT YOUR MODULES EXPLICITELY !!!
|
|
3
|
-
|
|
4
|
-
Module to gather all parameters.
|
|
5
|
-
|
|
6
|
-
If you use a module import all the functions here you only have 1 call to make
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
# from .config_runtime import *
|
|
10
|
-
|
|
11
|
-
# from .config_asr_url_info import *
|
|
12
|
-
|
|
13
|
-
# from .config_asr_credentials import *
|
|
14
|
-
|
|
15
|
-
# from parameters.config_ceo import *
|
|
1
|
+
"""
|
|
2
|
+
!!! BAD PRACTICE, ALWAYS IMPORT YOUR MODULES EXPLICITELY !!!
|
|
3
|
+
|
|
4
|
+
Module to gather all parameters.
|
|
5
|
+
|
|
6
|
+
If you use a module import all the functions here you only have 1 call to make
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
# from .config_runtime import *
|
|
10
|
+
|
|
11
|
+
# from .config_asr_url_info import *
|
|
12
|
+
|
|
13
|
+
# from .config_asr_credentials import *
|
|
14
|
+
|
|
15
|
+
# from parameters.config_ceo import *
|
|
@@ -1,44 +1,44 @@
|
|
|
1
|
-
from pathlib import Path
|
|
2
|
-
|
|
3
|
-
# output column names
|
|
4
|
-
# The names need to align with whisp/parameters/lookup_context_and_metadata.csv
|
|
5
|
-
geometry_area_column = "Area" # Note: datasets.py defines this explicitly as "Area", to allow it to be a standalone script. iso2 country code. Default of "Area" aligns with the EU Traces online reporting platform.
|
|
6
|
-
|
|
7
|
-
stats_unit_type_column = "Unit" # name of unit type column in the stats tabl
|
|
8
|
-
|
|
9
|
-
iso3_country_column = "Country"
|
|
10
|
-
|
|
11
|
-
iso2_country_column = "ProducerCountry" # iso2 country code. Default of "ProducerCountry" aligns with the EU Traces online reporting platform.
|
|
12
|
-
|
|
13
|
-
admin_1_column = "Admin_Level_1"
|
|
14
|
-
|
|
15
|
-
centroid_x_coord_column = "Centroid_lon"
|
|
16
|
-
|
|
17
|
-
centroid_y_coord_column = "Centroid_lat"
|
|
18
|
-
|
|
19
|
-
external_id_column = "external_id"
|
|
20
|
-
|
|
21
|
-
geometry_type_column = "Geometry_type"
|
|
22
|
-
|
|
23
|
-
plot_id_column = "plotId"
|
|
24
|
-
|
|
25
|
-
water_flag = "In_waterbody"
|
|
26
|
-
|
|
27
|
-
geometry_column = "geo" # geometry column name, stored as a string.
|
|
28
|
-
|
|
29
|
-
# reformatting numbers to decimal places (e.g. '%.3f' is 3 dp)
|
|
30
|
-
geometry_area_column_formatting = "%.3f"
|
|
31
|
-
|
|
32
|
-
stats_area_columns_formatting = "%.3f"
|
|
33
|
-
|
|
34
|
-
stats_percent_columns_formatting = "%.1f"
|
|
35
|
-
|
|
36
|
-
# lookup path - for dataset info
|
|
37
|
-
DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH = (
|
|
38
|
-
Path(__file__).parent / "lookup_gee_datasets.csv"
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
# lookup path - for dataset info
|
|
42
|
-
DEFAULT_CONTEXT_LOOKUP_TABLE_PATH = (
|
|
43
|
-
Path(__file__).parent / "lookup_context_and_metadata.csv"
|
|
44
|
-
)
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
# output column names
|
|
4
|
+
# The names need to align with whisp/parameters/lookup_context_and_metadata.csv
|
|
5
|
+
geometry_area_column = "Area" # Note: datasets.py defines this explicitly as "Area", to allow it to be a standalone script. iso2 country code. Default of "Area" aligns with the EU Traces online reporting platform.
|
|
6
|
+
|
|
7
|
+
stats_unit_type_column = "Unit" # name of unit type column in the stats tabl
|
|
8
|
+
|
|
9
|
+
iso3_country_column = "Country"
|
|
10
|
+
|
|
11
|
+
iso2_country_column = "ProducerCountry" # iso2 country code. Default of "ProducerCountry" aligns with the EU Traces online reporting platform.
|
|
12
|
+
|
|
13
|
+
admin_1_column = "Admin_Level_1"
|
|
14
|
+
|
|
15
|
+
centroid_x_coord_column = "Centroid_lon"
|
|
16
|
+
|
|
17
|
+
centroid_y_coord_column = "Centroid_lat"
|
|
18
|
+
|
|
19
|
+
external_id_column = "external_id"
|
|
20
|
+
|
|
21
|
+
geometry_type_column = "Geometry_type"
|
|
22
|
+
|
|
23
|
+
plot_id_column = "plotId"
|
|
24
|
+
|
|
25
|
+
water_flag = "In_waterbody"
|
|
26
|
+
|
|
27
|
+
geometry_column = "geo" # geometry column name, stored as a string.
|
|
28
|
+
|
|
29
|
+
# reformatting numbers to decimal places (e.g. '%.3f' is 3 dp)
|
|
30
|
+
geometry_area_column_formatting = "%.3f"
|
|
31
|
+
|
|
32
|
+
stats_area_columns_formatting = "%.3f"
|
|
33
|
+
|
|
34
|
+
stats_percent_columns_formatting = "%.1f"
|
|
35
|
+
|
|
36
|
+
# lookup path - for dataset info
|
|
37
|
+
DEFAULT_GEE_DATASETS_LOOKUP_TABLE_PATH = (
|
|
38
|
+
Path(__file__).parent / "lookup_gee_datasets.csv"
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
# lookup path - for dataset info
|
|
42
|
+
DEFAULT_CONTEXT_LOOKUP_TABLE_PATH = (
|
|
43
|
+
Path(__file__).parent / "lookup_context_and_metadata.csv"
|
|
44
|
+
)
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude_from_output,col_type,is_nullable,is_required,corresponding_variable
|
|
2
|
-
plotId,-10,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,plot_id_column
|
|
3
|
-
external_id,-9,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,external_id_column
|
|
4
|
-
Area,-8,,context_and_metadata,context_and_metadata,NA,NA,0,float32,1,1,geometry_area_column
|
|
5
|
-
Geometry_type,-7,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,geometry_type_column
|
|
6
|
-
Country,-6,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,iso3_country_column
|
|
7
|
-
ProducerCountry,-5,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,iso2_country_column
|
|
8
|
-
Admin_Level_1,-4,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,admin_1_column
|
|
9
|
-
Centroid_lon,-3,,context_and_metadata,context_and_metadata,NA,NA,0,float64,1,1,centroid_x_coord_column
|
|
10
|
-
Centroid_lat,-2,,context_and_metadata,context_and_metadata,NA,NA,0,float64,1,1,centroid_y_coord_column
|
|
11
|
-
Unit,-1,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,stats_unit_type_column
|
|
12
|
-
In_waterbody,0,,context_and_metadata,context_and_metadata,NA,NA,0,bool,1,1,water_flag
|
|
13
|
-
geo,9999,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,geometry_column
|
|
1
|
+
name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude_from_output,col_type,is_nullable,is_required,corresponding_variable
|
|
2
|
+
plotId,-10,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,plot_id_column
|
|
3
|
+
external_id,-9,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,0,external_id_column
|
|
4
|
+
Area,-8,,context_and_metadata,context_and_metadata,NA,NA,0,float32,1,1,geometry_area_column
|
|
5
|
+
Geometry_type,-7,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,geometry_type_column
|
|
6
|
+
Country,-6,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,iso3_country_column
|
|
7
|
+
ProducerCountry,-5,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,iso2_country_column
|
|
8
|
+
Admin_Level_1,-4,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,admin_1_column
|
|
9
|
+
Centroid_lon,-3,,context_and_metadata,context_and_metadata,NA,NA,0,float64,1,1,centroid_x_coord_column
|
|
10
|
+
Centroid_lat,-2,,context_and_metadata,context_and_metadata,NA,NA,0,float64,1,1,centroid_y_coord_column
|
|
11
|
+
Unit,-1,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,stats_unit_type_column
|
|
12
|
+
In_waterbody,0,,context_and_metadata,context_and_metadata,NA,NA,0,bool,1,1,water_flag
|
|
13
|
+
geo,9999,,context_and_metadata,context_and_metadata,NA,NA,0,string,1,1,geometry_column
|
|
@@ -2,7 +2,7 @@ name,order,ISO2_code,theme,theme_timber,use_for_risk,use_for_risk_timber,exclude
|
|
|
2
2
|
EUFO_2020,10,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_jrc_gfc_2020_prep
|
|
3
3
|
GLAD_Primary,20,,treecover,primary,1,1,0,float32,1,0,g_glad_pht_prep
|
|
4
4
|
TMF_undist,30,,treecover,primary,1,1,0,float32,1,0,g_jrc_tmf_undisturbed_prep
|
|
5
|
-
GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,
|
|
5
|
+
GFC_TC_2020,50,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_gfc_tc_2020_prep
|
|
6
6
|
Forest_FDaP,60,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_glad_gfc_10pc_prep
|
|
7
7
|
ESA_TC_2020,70,,treecover,naturally_reg_2020,1,1,0,float32,1,0,g_esa_worldcover_trees_prep
|
|
8
8
|
TMF_plant,80,,commodities,NA,1,1,0,float32,1,0,g_jrc_tmf_plantation_prep
|
|
@@ -199,3 +199,4 @@ nBR_INPE_TCamz_pasture_2020,2422,BR,commodities,NA,1,1,0,float32,1,0,nbr_terracl
|
|
|
199
199
|
nBR_INPE_TCcer_pasture_2020,2423,BR,commodities,NA,1,1,0,float32,1,0,nbr_terraclass_cer20_ac_prep
|
|
200
200
|
nBR_MapBiomas_col9_pasture_2020,2424,BR,commodities,NA,1,1,0,float32,1,0,nbr_mapbiomasc9_pasture_prep
|
|
201
201
|
nCI_Cocoa_bnetd,3000,CI,commodities,NA,1,1,0,float32,1,0,nci_ocs2020_prep
|
|
202
|
+
|
openforis_whisp/pd_schemas.py
CHANGED
|
@@ -1,77 +1,77 @@
|
|
|
1
|
-
import pandera as pa
|
|
2
|
-
from pandera.typing import DataFrame, Series
|
|
3
|
-
|
|
4
|
-
# Define a schema for validating a DataFrame related to GEE (Google Earth Engine) datasets.
|
|
5
|
-
class DataLookupSchema(pa.DataFrameModel):
|
|
6
|
-
|
|
7
|
-
# Ensure the name is unique
|
|
8
|
-
name: Series[str] = pa.Field(unique=True, nullable=False)
|
|
9
|
-
order: Series[int] = pa.Field(nullable=False)
|
|
10
|
-
theme: Series[str] = pa.Field(nullable=True)
|
|
11
|
-
|
|
12
|
-
# Define fields without checks
|
|
13
|
-
use_for_risk: Series[pa.Int | bool] = pa.Field(nullable=True)
|
|
14
|
-
exclude_from_output: Series[pa.Int | bool] = pa.Field(nullable=False)
|
|
15
|
-
|
|
16
|
-
# Define col_type without checks
|
|
17
|
-
col_type: Series[str] = pa.Field(nullable=False)
|
|
18
|
-
|
|
19
|
-
is_nullable: Series[pa.Int | bool] = pa.Field(nullable=False)
|
|
20
|
-
is_required: Series[pa.Int | bool] = pa.Field(nullable=False)
|
|
21
|
-
|
|
22
|
-
corresponding_variable: Series[str] = pa.Field(nullable=True)
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
# For type annotation - not used for validation yet
|
|
26
|
-
data_lookup_type = DataFrame[DataLookupSchema]
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
# checks (below) not working currently so using without
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
# import pandera as pa
|
|
33
|
-
# from pandera.typing import DataFrame, Series
|
|
34
|
-
|
|
35
|
-
# # Define a schema for validating a DataFrame related to GEE (Google Earth Engine) datasets.
|
|
36
|
-
# class DataLookupSchema(pa.DataFrameModel):
|
|
37
|
-
|
|
38
|
-
# # Ensure the name is unique
|
|
39
|
-
# name: Series[str] = pa.Field(unique=True, nullable=False)
|
|
40
|
-
# order: Series[int] = pa.Field(nullable=False)
|
|
41
|
-
# theme: Series[str] = pa.Field(nullable=True)
|
|
42
|
-
|
|
43
|
-
# # Restrict use_for_risk to 0 or 1, either as int or bool
|
|
44
|
-
# use_for_risk: Series[pa.Int | bool] = pa.Field(
|
|
45
|
-
# checks=pa.Check.isin([0, 1]), # Using 'checks' keyword argument
|
|
46
|
-
# nullable=True
|
|
47
|
-
# )
|
|
48
|
-
|
|
49
|
-
# # Restrict exclude_from_input and exclude_from_output to 0 or 1
|
|
50
|
-
# exclude_from_input: Series[pa.Int | bool] = pa.Field(
|
|
51
|
-
# checks=pa.Check.isin([0, 1]),
|
|
52
|
-
# nullable=False
|
|
53
|
-
# )
|
|
54
|
-
# exclude_from_output: Series[pa.Int | bool] = pa.Field(
|
|
55
|
-
# checks=pa.Check.isin([0, 1]),
|
|
56
|
-
# nullable=False
|
|
57
|
-
# )
|
|
58
|
-
|
|
59
|
-
# # Restrict col_type to specific values
|
|
60
|
-
# col_type: Series[str] = pa.Field(
|
|
61
|
-
# checks=pa.Check.isin(['int', 'int64', 'string', 'float32', 'float64', 'bool']),
|
|
62
|
-
# nullable=False
|
|
63
|
-
# )
|
|
64
|
-
|
|
65
|
-
# is_nullable: Series[pa.Int | bool] = pa.Field(
|
|
66
|
-
# checks=pa.Check.isin([0, 1]),
|
|
67
|
-
# nullable=False
|
|
68
|
-
# )
|
|
69
|
-
# is_required: Series[pa.Int | bool] = pa.Field(
|
|
70
|
-
# checks=pa.Check.isin([0, 1]),
|
|
71
|
-
# nullable=False
|
|
72
|
-
# )
|
|
73
|
-
|
|
74
|
-
# corresponding_variable: Series[str] = pa.Field(nullable=True)
|
|
75
|
-
|
|
76
|
-
# # For type annotation
|
|
77
|
-
# data_lookup_type = DataFrame[DataLookupSchema]
|
|
1
|
+
import pandera as pa
|
|
2
|
+
from pandera.typing import DataFrame, Series
|
|
3
|
+
|
|
4
|
+
# Define a schema for validating a DataFrame related to GEE (Google Earth Engine) datasets.
|
|
5
|
+
class DataLookupSchema(pa.DataFrameModel):
|
|
6
|
+
|
|
7
|
+
# Ensure the name is unique
|
|
8
|
+
name: Series[str] = pa.Field(unique=True, nullable=False)
|
|
9
|
+
order: Series[int] = pa.Field(nullable=False)
|
|
10
|
+
theme: Series[str] = pa.Field(nullable=True)
|
|
11
|
+
|
|
12
|
+
# Define fields without checks
|
|
13
|
+
use_for_risk: Series[pa.Int | bool] = pa.Field(nullable=True)
|
|
14
|
+
exclude_from_output: Series[pa.Int | bool] = pa.Field(nullable=False)
|
|
15
|
+
|
|
16
|
+
# Define col_type without checks
|
|
17
|
+
col_type: Series[str] = pa.Field(nullable=False)
|
|
18
|
+
|
|
19
|
+
is_nullable: Series[pa.Int | bool] = pa.Field(nullable=False)
|
|
20
|
+
is_required: Series[pa.Int | bool] = pa.Field(nullable=False)
|
|
21
|
+
|
|
22
|
+
corresponding_variable: Series[str] = pa.Field(nullable=True)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# For type annotation - not used for validation yet
|
|
26
|
+
data_lookup_type = DataFrame[DataLookupSchema]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# checks (below) not working currently so using without
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# import pandera as pa
|
|
33
|
+
# from pandera.typing import DataFrame, Series
|
|
34
|
+
|
|
35
|
+
# # Define a schema for validating a DataFrame related to GEE (Google Earth Engine) datasets.
|
|
36
|
+
# class DataLookupSchema(pa.DataFrameModel):
|
|
37
|
+
|
|
38
|
+
# # Ensure the name is unique
|
|
39
|
+
# name: Series[str] = pa.Field(unique=True, nullable=False)
|
|
40
|
+
# order: Series[int] = pa.Field(nullable=False)
|
|
41
|
+
# theme: Series[str] = pa.Field(nullable=True)
|
|
42
|
+
|
|
43
|
+
# # Restrict use_for_risk to 0 or 1, either as int or bool
|
|
44
|
+
# use_for_risk: Series[pa.Int | bool] = pa.Field(
|
|
45
|
+
# checks=pa.Check.isin([0, 1]), # Using 'checks' keyword argument
|
|
46
|
+
# nullable=True
|
|
47
|
+
# )
|
|
48
|
+
|
|
49
|
+
# # Restrict exclude_from_input and exclude_from_output to 0 or 1
|
|
50
|
+
# exclude_from_input: Series[pa.Int | bool] = pa.Field(
|
|
51
|
+
# checks=pa.Check.isin([0, 1]),
|
|
52
|
+
# nullable=False
|
|
53
|
+
# )
|
|
54
|
+
# exclude_from_output: Series[pa.Int | bool] = pa.Field(
|
|
55
|
+
# checks=pa.Check.isin([0, 1]),
|
|
56
|
+
# nullable=False
|
|
57
|
+
# )
|
|
58
|
+
|
|
59
|
+
# # Restrict col_type to specific values
|
|
60
|
+
# col_type: Series[str] = pa.Field(
|
|
61
|
+
# checks=pa.Check.isin(['int', 'int64', 'string', 'float32', 'float64', 'bool']),
|
|
62
|
+
# nullable=False
|
|
63
|
+
# )
|
|
64
|
+
|
|
65
|
+
# is_nullable: Series[pa.Int | bool] = pa.Field(
|
|
66
|
+
# checks=pa.Check.isin([0, 1]),
|
|
67
|
+
# nullable=False
|
|
68
|
+
# )
|
|
69
|
+
# is_required: Series[pa.Int | bool] = pa.Field(
|
|
70
|
+
# checks=pa.Check.isin([0, 1]),
|
|
71
|
+
# nullable=False
|
|
72
|
+
# )
|
|
73
|
+
|
|
74
|
+
# corresponding_variable: Series[str] = pa.Field(nullable=True)
|
|
75
|
+
|
|
76
|
+
# # For type annotation
|
|
77
|
+
# data_lookup_type = DataFrame[DataLookupSchema]
|