geocif 0.1.52__tar.gz → 0.1.54__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.52/geocif.egg-info → geocif-0.1.54}/PKG-INFO +1 -1
- {geocif-0.1.52 → geocif-0.1.54}/geocif/analysis.py +70 -41
- {geocif-0.1.52 → geocif-0.1.54}/geocif/geocif.py +6 -5
- {geocif-0.1.52 → geocif-0.1.54}/geocif/indices_runner_v2.py +2 -2
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/feature_engineering.py +5 -1
- {geocif-0.1.52 → geocif-0.1.54/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.52 → geocif-0.1.54}/setup.py +1 -1
- {geocif-0.1.52 → geocif-0.1.54}/LICENSE +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/MANIFEST.in +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/README.md +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/__init__.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/backup/constants.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/backup/features.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/backup/geo.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/backup/models.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/cei/definitions.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/cei/indices.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/experiments.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/geocif_runner.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/indices_runner.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/indices_runner_v3.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/logger.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/correlations.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/embedding.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/feature_selection.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/output.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/spatial_autocorrelation.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/stages.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/stats.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/trend.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/ml/xai.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/mm.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/aa.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/automl.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/download_esi.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/enso.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/gamtest.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/misc.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/sustain.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/test_catboost.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/tmp.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/tmp2.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/tmp3.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/tmp4.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/playground/tmp5.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/risk/__init__.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/risk/impact_assessment.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/utils.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif/viz/plot.py +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif.egg-info/SOURCES.txt +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/requirements.txt +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/setup.cfg +0 -0
- {geocif-0.1.52 → geocif-0.1.54}/tests/test_geocif.py +0 -0
@@ -93,6 +93,9 @@ class Geoanalysis:
|
|
93
93
|
& (self.df_analysis["Crop"] == self.crop)
|
94
94
|
& (self.df_analysis["Model"] == self.model)
|
95
95
|
]
|
96
|
+
|
97
|
+
# Drop columns that are empty
|
98
|
+
# self.df_analysis = self.df_analysis.dropna(axis=1, how="all")
|
96
99
|
except Exception as e:
|
97
100
|
pass
|
98
101
|
|
@@ -384,7 +387,7 @@ class Geoanalysis:
|
|
384
387
|
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
|
385
388
|
|
386
389
|
# Ensure 'Harvest Year' is numeric
|
387
|
-
df["Harvest Year"] = pd.to_numeric(df["Harvest Year"], errors="coerce")
|
390
|
+
df.loc[:, "Harvest Year"] = pd.to_numeric(df["Harvest Year"], errors="coerce")
|
388
391
|
|
389
392
|
# Extract data
|
390
393
|
y_observed = df["Observed Yield (tn per ha)"]
|
@@ -414,8 +417,15 @@ class Geoanalysis:
|
|
414
417
|
rmse = np.sqrt(mean_squared_error(y_observed, y_predicted))
|
415
418
|
mape = mean_absolute_percentage_error(y_observed, y_predicted)
|
416
419
|
r2 = r2_score(y_observed, y_predicted)
|
420
|
+
n_points = len(y_observed) # Number of data points
|
421
|
+
|
422
|
+
textstr = (
|
423
|
+
f"RMSE: {rmse:.2f} tn/ha\n"
|
424
|
+
f"MAPE: {mape:.2%}\n"
|
425
|
+
f"$r^2$: {r2:.2f}\n"
|
426
|
+
f"N: {n_points}"
|
427
|
+
)
|
417
428
|
|
418
|
-
textstr = f"RMSE: {rmse:.2f} tn/ha\nMAPE: {mape:.2%}\n$r^2$: {r2:.2f}"
|
419
429
|
ax.annotate(
|
420
430
|
textstr,
|
421
431
|
xy=(0.05, 0.95),
|
@@ -485,7 +495,15 @@ class Geoanalysis:
|
|
485
495
|
mape = mean_absolute_percentage_error(y_observed, y_predicted)
|
486
496
|
r2 = r2_score(y_observed, y_predicted)
|
487
497
|
|
488
|
-
|
498
|
+
n_points = len(y_observed) # Number of data points
|
499
|
+
|
500
|
+
textstr = (
|
501
|
+
f"RMSE: {rmse:.2f} tn/ha\n"
|
502
|
+
f"MAPE: {mape:.2%}\n"
|
503
|
+
f"$r^2$: {r2:.2f}\n"
|
504
|
+
f"N: {n_points}"
|
505
|
+
)
|
506
|
+
|
489
507
|
ax.annotate(
|
490
508
|
textstr,
|
491
509
|
xy=(0.05, 0.95),
|
@@ -636,7 +654,7 @@ class Geoanalysis:
|
|
636
654
|
#
|
637
655
|
# """ % of total area """
|
638
656
|
if idx == 0:
|
639
|
-
fname = f"{self.country}_{self.crop}_perc_area.png"
|
657
|
+
fname = f"map_{self.country}_{self.crop}_perc_area.png"
|
640
658
|
col = "% of total Area (ha)"
|
641
659
|
plot.plot_df_shpfile(
|
642
660
|
self.dg, # dataframe containing adm1 name and polygon
|
@@ -652,13 +670,13 @@ class Geoanalysis:
|
|
652
670
|
cmap=pal.scientific.sequential.Bamako_20_r,
|
653
671
|
series="sequential",
|
654
672
|
show_bg=False,
|
655
|
-
annotate_regions=
|
673
|
+
annotate_regions=self.annotate_regions,
|
656
674
|
annotate_region_column=annotate_region_column,
|
657
675
|
loc_legend="lower left",
|
658
676
|
)
|
659
677
|
#
|
660
678
|
""" Unique regions """
|
661
|
-
fname = f"{self.country}_{self.crop}_region_ID.png"
|
679
|
+
fname = f"map_{self.country}_{self.crop}_region_ID.png"
|
662
680
|
col = "Region_ID"
|
663
681
|
df_model[col] = df_model[col].astype(int) + 1
|
664
682
|
if len(df_model["Region_ID"].unique() > 1):
|
@@ -668,27 +686,27 @@ class Geoanalysis:
|
|
668
686
|
for key in df_time_period["Region_ID"].unique()
|
669
687
|
}
|
670
688
|
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
689
|
+
plot.plot_df_shpfile(
|
690
|
+
self.dg, # dataframe containing adm1 name and polygon
|
691
|
+
df_model, # dataframe containing information that will be mapped
|
692
|
+
dict_lup=dict_region,
|
693
|
+
merge_col="Country Region", # Column on which to merge
|
694
|
+
name_country=countries, # Plot global map
|
695
|
+
name_col=col, # Which column to plot
|
696
|
+
dir_out=self.dir_plot / str(year), # Output directory
|
697
|
+
fname=fname, # Output file name
|
698
|
+
label=f"Region Cluster\n{self.crop.title()}",
|
699
|
+
vmin=df_model[col].min(),
|
700
|
+
vmax=df_model[col].max(),
|
701
|
+
cmap=pal.tableau.Tableau_20.mpl_colors,
|
702
|
+
series="qualitative",
|
703
|
+
show_bg=False,
|
704
|
+
alpha_feature=1,
|
705
|
+
use_key=True,
|
706
|
+
annotate_regions=self.annotate_regions,
|
707
|
+
annotate_region_column=annotate_region_column,
|
708
|
+
loc_legend="lower left",
|
709
|
+
)
|
692
710
|
# breakpoint()
|
693
711
|
|
694
712
|
# """ Anomaly """
|
@@ -715,7 +733,7 @@ class Geoanalysis:
|
|
715
733
|
# )
|
716
734
|
|
717
735
|
""" Predicted Yield """
|
718
|
-
fname = f"{fname_prefix}_{self.crop}_{time_period}_{year}_predicted_yield.png"
|
736
|
+
fname = f"map_{fname_prefix}_{self.crop}_{time_period}_{year}_predicted_yield.png"
|
719
737
|
plot.plot_df_shpfile(
|
720
738
|
self.dg, # dataframe containing adm1 name and polygon
|
721
739
|
df_harvest_year, # dataframe containing information that will be mapped
|
@@ -730,7 +748,7 @@ class Geoanalysis:
|
|
730
748
|
cmap=pal.scientific.sequential.Bamako_20_r,
|
731
749
|
series="sequential",
|
732
750
|
show_bg=False,
|
733
|
-
annotate_regions=
|
751
|
+
annotate_regions=self.annotate_regions,
|
734
752
|
annotate_region_column=annotate_region_column,
|
735
753
|
loc_legend="lower left",
|
736
754
|
)
|
@@ -759,7 +777,7 @@ class Geoanalysis:
|
|
759
777
|
# Area
|
760
778
|
# breakpoint()
|
761
779
|
if df_time_period["Area (ha)"].notna().all():
|
762
|
-
fname = f"{self.country}_{self.crop}_{year}_area.png"
|
780
|
+
fname = f"map_{self.country}_{self.crop}_{year}_area.png"
|
763
781
|
plot.plot_df_shpfile(
|
764
782
|
self.dg, # dataframe containing adm1 name and polygon
|
765
783
|
df_time_period, # dataframe containing information that will be mapped
|
@@ -774,7 +792,7 @@ class Geoanalysis:
|
|
774
792
|
cmap=pal.scientific.sequential.Bamako_20_r,
|
775
793
|
series="sequential",
|
776
794
|
show_bg=False,
|
777
|
-
annotate_regions=
|
795
|
+
annotate_regions=self.annotate_regions,
|
778
796
|
loc_legend="lower left",
|
779
797
|
)
|
780
798
|
|
@@ -884,6 +902,7 @@ class Geoanalysis:
|
|
884
902
|
engine="pyogrio",
|
885
903
|
)
|
886
904
|
self.admin_col_name = self.parser.get(country, "admin_col_name")
|
905
|
+
self.annotate_regions = self.parser.getboolean(country, "annotate_regions")
|
887
906
|
|
888
907
|
# If ADMIN0 or ADM0_NAME is not in the shapefile, then add ADM0_NAME
|
889
908
|
if "ADMIN0" or "ADM0_NAME" not in self.dg.columns:
|
@@ -950,18 +969,28 @@ class RegionalMapper(Geoanalysis):
|
|
950
969
|
con = sqlite3.connect(self.db_path)
|
951
970
|
|
952
971
|
query = "SELECT * FROM regional_metrics"
|
953
|
-
|
972
|
+
try:
|
973
|
+
self.df_regional = pd.read_sql_query(query, con)
|
974
|
+
except:
|
975
|
+
self.logger.error("Failed to read data from regional_metrics")
|
976
|
+
self.df_regional = pd.DataFrame()
|
977
|
+
|
954
978
|
query = "SELECT * FROM regional_metrics_by_year"
|
955
|
-
|
979
|
+
try:
|
980
|
+
self.df_regional_by_year = pd.read_sql_query(query, con)
|
981
|
+
except:
|
982
|
+
self.logger.error("Failed to read data from regional_metrics_by_year")
|
983
|
+
self.df_regional_by_year = pd.DataFrame()
|
956
984
|
|
957
985
|
con.close()
|
958
986
|
|
959
987
|
def clean_data(self):
|
960
988
|
"""Clean and format the data."""
|
961
|
-
self.df_regional
|
962
|
-
self.df_regional["Country"]
|
963
|
-
|
964
|
-
|
989
|
+
if not self.df_regional.empty:
|
990
|
+
self.df_regional["Country"] = (
|
991
|
+
self.df_regional["Country"].str.replace("_", " ").str.title()
|
992
|
+
)
|
993
|
+
self.df_regional["Model"] = self.df_regional["Model"].str.title()
|
965
994
|
|
966
995
|
def plot_heatmap(self):
|
967
996
|
"""Generate heatmaps of MAPE bins vs. % total area bins."""
|
@@ -1070,7 +1099,7 @@ class RegionalMapper(Geoanalysis):
|
|
1070
1099
|
)
|
1071
1100
|
|
1072
1101
|
plt.tight_layout()
|
1073
|
-
plt.savefig(self.dir_analysis / f"
|
1102
|
+
plt.savefig(self.dir_analysis / f"histogram_region_{model}_mape.png", dpi=250)
|
1074
1103
|
plt.close()
|
1075
1104
|
|
1076
1105
|
def plot_mape_map(self):
|
@@ -1101,7 +1130,7 @@ class RegionalMapper(Geoanalysis):
|
|
1101
1130
|
df = df_model[df_model["Country"].isin(countries)]
|
1102
1131
|
self.dg = self.dg[self.dg["ADM0_NAME"].isin(countries)]
|
1103
1132
|
|
1104
|
-
fname = f"
|
1133
|
+
fname = f"map_{crop}_{df_model['Model'].iloc[0]}_mape.png"
|
1105
1134
|
plot.plot_df_shpfile(
|
1106
1135
|
self.dg,
|
1107
1136
|
df,
|
@@ -1116,7 +1145,7 @@ class RegionalMapper(Geoanalysis):
|
|
1116
1145
|
cmap=pal.scientific.sequential.Bamako_20_r,
|
1117
1146
|
series="sequential",
|
1118
1147
|
show_bg=False,
|
1119
|
-
annotate_regions=
|
1148
|
+
annotate_regions=self.annotate_regions,
|
1120
1149
|
loc_legend="lower left",
|
1121
1150
|
)
|
1122
1151
|
|
@@ -1145,7 +1174,7 @@ class RegionalMapper(Geoanalysis):
|
|
1145
1174
|
plt.xticks(rotation=0)
|
1146
1175
|
|
1147
1176
|
plt.tight_layout()
|
1148
|
-
plt.savefig(self.dir_analysis / "
|
1177
|
+
plt.savefig(self.dir_analysis / "bar_mape_by_year.png", dpi=250)
|
1149
1178
|
plt.close()
|
1150
1179
|
|
1151
1180
|
|
@@ -582,11 +582,10 @@ class Geocif:
|
|
582
582
|
}
|
583
583
|
)
|
584
584
|
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
)
|
585
|
+
# Add median yield to dataframe
|
586
|
+
df.loc[:, f"Median {self.target}"] = np.around(
|
587
|
+
df_region[f"Median {self.target}"].values, 3
|
588
|
+
)
|
590
589
|
|
591
590
|
if self.estimate_ci:
|
592
591
|
if self.estimate_ci_for_all or self.forecast_season == self.today_year:
|
@@ -1157,6 +1156,8 @@ class Geocif:
|
|
1157
1156
|
group.index, "Detrended Model Type"
|
1158
1157
|
] = detrended_data.model_type
|
1159
1158
|
|
1159
|
+
if group.empty:
|
1160
|
+
breakpoint()
|
1160
1161
|
# Create categorical classes for target column
|
1161
1162
|
group, new_target_column, bins = fe.classify_target(
|
1162
1163
|
group, self.target, self.number_classes
|
@@ -47,8 +47,8 @@ class cei_runner(base.BaseGeo):
|
|
47
47
|
|
48
48
|
self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
|
49
49
|
self.base_dir = Path(
|
50
|
-
|
51
|
-
r"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/nepal"
|
50
|
+
r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\nepal"
|
51
|
+
#r"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/nepal"
|
52
52
|
) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
|
53
53
|
self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
|
54
54
|
|
@@ -361,7 +361,11 @@ def classify_target(df, target_col, number_classes):
|
|
361
361
|
new_target_col = f"{target_col}_class"
|
362
362
|
|
363
363
|
# Change the target column to categorical with the specified number of classes
|
364
|
-
df[new_target_col], bins = pd.qcut(df[target_col],
|
364
|
+
df[new_target_col], bins = pd.qcut(df[target_col],
|
365
|
+
q=number_classes,
|
366
|
+
labels=False,
|
367
|
+
retbins=True,
|
368
|
+
duplicates='drop')
|
365
369
|
|
366
370
|
return df, new_target_col, bins
|
367
371
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|