geocif 0.1.81__tar.gz → 0.1.83__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.81/geocif.egg-info → geocif-0.1.83}/PKG-INFO +1 -1
- {geocif-0.1.81 → geocif-0.1.83}/geocif/cei/indices.py +1 -1
- {geocif-0.1.81 → geocif-0.1.83}/geocif/geocif.py +37 -11
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/embedding.py +4 -1
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/stages.py +5 -0
- {geocif-0.1.81 → geocif-0.1.83/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.81 → geocif-0.1.83}/setup.py +1 -1
- {geocif-0.1.81 → geocif-0.1.83}/LICENSE +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/MANIFEST.in +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/README.md +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/__init__.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/analysis.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/constants.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/features.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/geo.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/models.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/cei/definitions.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/experiments.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/geocif_runner.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_angola.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_madagascar.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_malawi.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_mozambique.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_south_africa.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_zambia.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_zimbabwe.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/logger.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/correlations.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/feature_engineering.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/feature_selection.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/output.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/spatial_autocorrelation.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/stats.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/trend.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/xai.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/mm.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/aa.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/area.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/automl.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/download_esi.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/enso.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/eval.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/gamtest.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/gee_access.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/misc.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/play_xagg.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/reg.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/sustain.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/test_catboost.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/tmp.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/tmp2.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/tmp3.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/tmp4.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/tmp5.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/wolayita_maize_mask.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/risk/__init__.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/risk/impact_assessment.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/utils.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/viz/gt.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/viz/plot.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif/viz/tmp.py +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif.egg-info/SOURCES.txt +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/requirements.txt +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/setup.cfg +0 -0
- {geocif-0.1.81 → geocif-0.1.83}/tests/test_geocif.py +0 -0
@@ -945,11 +945,12 @@ class Geocif:
|
|
945
945
|
parts = all_cei_columns[-1].split("_")
|
946
946
|
cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
|
947
947
|
|
948
|
-
# For each region, find the column with the longest string in cei_column
|
949
|
-
group_by = ["Region"]
|
950
|
-
groups = df.groupby(group_by)
|
951
948
|
if self.use_cumulative_features:
|
952
949
|
frames = []
|
950
|
+
# For each region, find the column with the longest string in cei_column
|
951
|
+
group_by = ["Region"]
|
952
|
+
groups = df.groupby(group_by)
|
953
|
+
|
953
954
|
for name, group in groups:
|
954
955
|
# Drop columns with all NaNs
|
955
956
|
group.dropna(axis=1, how="all", inplace=True)
|
@@ -1019,26 +1020,45 @@ class Geocif:
|
|
1019
1020
|
# Drop those columns
|
1020
1021
|
|
1021
1022
|
df = df.drop(columns=cols_to_drop)
|
1022
|
-
|
1023
|
+
from collections import Counter
|
1024
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1025
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1026
|
+
print("<0>", dupes)
|
1023
1027
|
# Hack: If
|
1024
1028
|
# Change column name
|
1025
1029
|
# e.g. 'vDTR_7_6_5_4_3_2_1_37_36_35_34_33_32_31' to 'vDTR Mar 1-Oct 27'
|
1026
1030
|
df = stages.update_feature_names(df, self.method)
|
1027
|
-
|
1031
|
+
from collections import Counter
|
1032
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1033
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1034
|
+
print("<111>", dupes)
|
1028
1035
|
all_cei_columns = self.get_cei_column_names(df)
|
1029
1036
|
# Fill in any missing values with 0
|
1030
1037
|
df.loc[:, all_cei_columns].fillna(0, inplace=True)
|
1038
|
+
from collections import Counter
|
1039
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1040
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1041
|
+
print("<1>", dupes)
|
1031
1042
|
|
1032
1043
|
df = fe.compute_last_year_yield(df, self.target)
|
1033
|
-
|
1044
|
+
from collections import Counter
|
1045
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1046
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1047
|
+
print("<2>", dupes)
|
1034
1048
|
df = fe.compute_median_statistics(
|
1035
1049
|
df, self.all_seasons_with_yield, self.number_median_years, self.target
|
1036
1050
|
)
|
1037
|
-
|
1051
|
+
from collections import Counter
|
1052
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1053
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1054
|
+
print("<3>", dupes)
|
1038
1055
|
df = fe.compute_user_median_statistics(df, range(2018, 2023))
|
1039
1056
|
|
1040
1057
|
df = fe.compute_user_median_statistics(df, range(2013, 2018))
|
1041
|
-
|
1058
|
+
from collections import Counter
|
1059
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1060
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1061
|
+
print("<4>", dupes)
|
1042
1062
|
if self.median_area_as_feature:
|
1043
1063
|
df = fe.compute_median_statistics(
|
1044
1064
|
df, self.all_seasons_with_yield, self.number_median_years, "Area (ha)"
|
@@ -1053,7 +1073,10 @@ class Geocif:
|
|
1053
1073
|
df = fe.compute_analogous_yield(
|
1054
1074
|
df, self.all_seasons_with_yield, self.number_median_years, self.target
|
1055
1075
|
)
|
1056
|
-
|
1076
|
+
from collections import Counter
|
1077
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1078
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1079
|
+
print("5", dupes)
|
1057
1080
|
# Create Region_ID column based on Region column category code
|
1058
1081
|
df["Region"] = df["Region"].astype("category")
|
1059
1082
|
if self.cluster_strategy == "single":
|
@@ -1067,7 +1090,7 @@ class Geocif:
|
|
1067
1090
|
|
1068
1091
|
# Region_ID should be type category
|
1069
1092
|
df["Region_ID"] = df["Region_ID"].astype("category")
|
1070
|
-
|
1093
|
+
breakpoint()
|
1071
1094
|
return df
|
1072
1095
|
|
1073
1096
|
def execute(self):
|
@@ -1474,7 +1497,8 @@ class Geocif:
|
|
1474
1497
|
assert all_files, f"No files found in {_dir_country} with {file_name}"
|
1475
1498
|
|
1476
1499
|
self.df_inputs = pd.concat(
|
1477
|
-
(pd.read_csv(f) for f in all_files
|
1500
|
+
(pd.read_csv(f, engine="pyarrow") for f in tqdm(all_files, desc="Reading CSVs", leave=False)),
|
1501
|
+
ignore_index=True
|
1478
1502
|
)
|
1479
1503
|
|
1480
1504
|
self.df_inputs = stats.add_statistics(
|
@@ -1486,7 +1510,9 @@ class Geocif:
|
|
1486
1510
|
[self.target] + self.statistics_columns,
|
1487
1511
|
self.method,
|
1488
1512
|
)
|
1513
|
+
|
1489
1514
|
""" Add information on starting and ending time period for each stage"""
|
1515
|
+
self.logger.info("Adding starting and ending time period for each stage")
|
1490
1516
|
self.df_inputs = stages.add_stage_information(self.df_inputs, self.method)
|
1491
1517
|
|
1492
1518
|
self.df_inputs.to_csv(file, index=False)
|
@@ -32,7 +32,10 @@ def _compute_correlations(X, y):
|
|
32
32
|
f_series = X[feature]
|
33
33
|
|
34
34
|
# Ignore NaN values in either y or f_series
|
35
|
-
|
35
|
+
try:
|
36
|
+
mask = ~(np.isnan(y) | np.isnan(f_series))
|
37
|
+
except:
|
38
|
+
breakpoint()
|
36
39
|
y_filtered = y[mask]
|
37
40
|
f_series_filtered = f_series[mask]
|
38
41
|
|
@@ -268,6 +268,11 @@ def update_feature_names(df, method):
|
|
268
268
|
# Saving the result in the dictionary
|
269
269
|
stages_info[element] = (cei, start_stage, end_stage, new_column_name)
|
270
270
|
|
271
|
+
# Check if any duplicates exist in the dictionary
|
272
|
+
if len(stages_info) != len(set(stages_info.values())):
|
273
|
+
breakpoint()
|
274
|
+
raise ValueError(f"Duplicate stage information found for {element}")
|
275
|
+
breakpoint()
|
271
276
|
# For each column in df, check if it exists in stages_info, and
|
272
277
|
# replace it with the new column name
|
273
278
|
# Precompute the rename mapping outside the loop
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|