geocif 0.1.53__tar.gz → 0.1.55__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {geocif-0.1.53/geocif.egg-info → geocif-0.1.55}/PKG-INFO +1 -1
  2. {geocif-0.1.53 → geocif-0.1.55}/geocif/analysis.py +80 -51
  3. {geocif-0.1.53 → geocif-0.1.55}/geocif/geocif.py +7 -5
  4. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/feature_engineering.py +5 -4
  5. {geocif-0.1.53 → geocif-0.1.55/geocif.egg-info}/PKG-INFO +1 -1
  6. {geocif-0.1.53 → geocif-0.1.55}/setup.py +1 -1
  7. {geocif-0.1.53 → geocif-0.1.55}/LICENSE +0 -0
  8. {geocif-0.1.53 → geocif-0.1.55}/MANIFEST.in +0 -0
  9. {geocif-0.1.53 → geocif-0.1.55}/README.md +0 -0
  10. {geocif-0.1.53 → geocif-0.1.55}/geocif/__init__.py +0 -0
  11. {geocif-0.1.53 → geocif-0.1.55}/geocif/agmet/__init__.py +0 -0
  12. {geocif-0.1.53 → geocif-0.1.55}/geocif/agmet/geoagmet.py +0 -0
  13. {geocif-0.1.53 → geocif-0.1.55}/geocif/agmet/plot.py +0 -0
  14. {geocif-0.1.53 → geocif-0.1.55}/geocif/agmet/utils.py +0 -0
  15. {geocif-0.1.53 → geocif-0.1.55}/geocif/backup/__init__.py +0 -0
  16. {geocif-0.1.53 → geocif-0.1.55}/geocif/backup/constants.py +0 -0
  17. {geocif-0.1.53 → geocif-0.1.55}/geocif/backup/features.py +0 -0
  18. {geocif-0.1.53 → geocif-0.1.55}/geocif/backup/geo.py +0 -0
  19. {geocif-0.1.53 → geocif-0.1.55}/geocif/backup/geocif.py +0 -0
  20. {geocif-0.1.53 → geocif-0.1.55}/geocif/backup/metadata.py +0 -0
  21. {geocif-0.1.53 → geocif-0.1.55}/geocif/backup/models.py +0 -0
  22. {geocif-0.1.53 → geocif-0.1.55}/geocif/cei/__init__.py +0 -0
  23. {geocif-0.1.53 → geocif-0.1.55}/geocif/cei/definitions.py +0 -0
  24. {geocif-0.1.53 → geocif-0.1.55}/geocif/cei/indices.py +0 -0
  25. {geocif-0.1.53 → geocif-0.1.55}/geocif/experiments.py +0 -0
  26. {geocif-0.1.53 → geocif-0.1.55}/geocif/geocif_runner.py +0 -0
  27. {geocif-0.1.53 → geocif-0.1.55}/geocif/indices_runner.py +0 -0
  28. {geocif-0.1.53 → geocif-0.1.55}/geocif/indices_runner_v2.py +0 -0
  29. {geocif-0.1.53 → geocif-0.1.55}/geocif/indices_runner_v3.py +0 -0
  30. {geocif-0.1.53 → geocif-0.1.55}/geocif/logger.py +0 -0
  31. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/__init__.py +0 -0
  32. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/correlations.py +0 -0
  33. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/embedding.py +0 -0
  34. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/feature_selection.py +0 -0
  35. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/outliers.py +0 -0
  36. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/outlook.py +0 -0
  37. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/output.py +0 -0
  38. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/spatial_autocorrelation.py +0 -0
  39. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/stages.py +0 -0
  40. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/stats.py +0 -0
  41. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/trainers.py +0 -0
  42. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/trend.py +0 -0
  43. {geocif-0.1.53 → geocif-0.1.55}/geocif/ml/xai.py +0 -0
  44. {geocif-0.1.53 → geocif-0.1.55}/geocif/mm.py +0 -0
  45. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/__init__.py +0 -0
  46. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/aa.py +0 -0
  47. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/automl.py +0 -0
  48. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/download_esi.py +0 -0
  49. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/enso.py +0 -0
  50. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/gamtest.py +0 -0
  51. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/misc.py +0 -0
  52. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/sustain.py +0 -0
  53. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/test_catboost.py +0 -0
  54. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/tmp.py +0 -0
  55. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/tmp2.py +0 -0
  56. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/tmp3.py +0 -0
  57. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/tmp4.py +0 -0
  58. {geocif-0.1.53 → geocif-0.1.55}/geocif/playground/tmp5.py +0 -0
  59. {geocif-0.1.53 → geocif-0.1.55}/geocif/risk/__init__.py +0 -0
  60. {geocif-0.1.53 → geocif-0.1.55}/geocif/risk/impact_assessment.py +0 -0
  61. {geocif-0.1.53 → geocif-0.1.55}/geocif/utils.py +0 -0
  62. {geocif-0.1.53 → geocif-0.1.55}/geocif/viz/__init__.py +0 -0
  63. {geocif-0.1.53 → geocif-0.1.55}/geocif/viz/plot.py +0 -0
  64. {geocif-0.1.53 → geocif-0.1.55}/geocif.egg-info/SOURCES.txt +0 -0
  65. {geocif-0.1.53 → geocif-0.1.55}/geocif.egg-info/dependency_links.txt +0 -0
  66. {geocif-0.1.53 → geocif-0.1.55}/geocif.egg-info/not-zip-safe +0 -0
  67. {geocif-0.1.53 → geocif-0.1.55}/geocif.egg-info/top_level.txt +0 -0
  68. {geocif-0.1.53 → geocif-0.1.55}/requirements.txt +0 -0
  69. {geocif-0.1.53 → geocif-0.1.55}/setup.cfg +0 -0
  70. {geocif-0.1.53 → geocif-0.1.55}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.53
3
+ Version: 0.1.55
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -93,6 +93,9 @@ class Geoanalysis:
93
93
  & (self.df_analysis["Crop"] == self.crop)
94
94
  & (self.df_analysis["Model"] == self.model)
95
95
  ]
96
+
97
+ # Drop columns that are empty
98
+ # self.df_analysis = self.df_analysis.dropna(axis=1, how="all")
96
99
  except Exception as e:
97
100
  pass
98
101
 
@@ -384,7 +387,7 @@ class Geoanalysis:
384
387
  from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
385
388
 
386
389
  # Ensure 'Harvest Year' is numeric
387
- df["Harvest Year"] = pd.to_numeric(df["Harvest Year"], errors="coerce")
390
+ df.loc[:, "Harvest Year"] = pd.to_numeric(df["Harvest Year"], errors="coerce")
388
391
 
389
392
  # Extract data
390
393
  y_observed = df["Observed Yield (tn per ha)"]
@@ -414,8 +417,15 @@ class Geoanalysis:
414
417
  rmse = np.sqrt(mean_squared_error(y_observed, y_predicted))
415
418
  mape = mean_absolute_percentage_error(y_observed, y_predicted)
416
419
  r2 = r2_score(y_observed, y_predicted)
420
+ n_points = len(y_observed) # Number of data points
421
+
422
+ textstr = (
423
+ f"RMSE: {rmse:.2f} tn/ha\n"
424
+ f"MAPE: {mape:.2%}\n"
425
+ f"$r^2$: {r2:.2f}\n"
426
+ f"N: {n_points}"
427
+ )
417
428
 
418
- textstr = f"RMSE: {rmse:.2f} tn/ha\nMAPE: {mape:.2%}\n$r^2$: {r2:.2f}"
419
429
  ax.annotate(
420
430
  textstr,
421
431
  xy=(0.05, 0.95),
@@ -485,7 +495,15 @@ class Geoanalysis:
485
495
  mape = mean_absolute_percentage_error(y_observed, y_predicted)
486
496
  r2 = r2_score(y_observed, y_predicted)
487
497
 
488
- textstr = f"RMSE: {rmse:.2f} tn/ha\nMAPE: {mape:.2%}\nr²: {r2:.2f}"
498
+ n_points = len(y_observed) # Number of data points
499
+
500
+ textstr = (
501
+ f"RMSE: {rmse:.2f} tn/ha\n"
502
+ f"MAPE: {mape:.2%}\n"
503
+ f"$r^2$: {r2:.2f}\n"
504
+ f"N: {n_points}"
505
+ )
506
+
489
507
  ax.annotate(
490
508
  textstr,
491
509
  xy=(0.05, 0.95),
@@ -636,7 +654,7 @@ class Geoanalysis:
636
654
  #
637
655
  # """ % of total area """
638
656
  if idx == 0:
639
- fname = f"{self.country}_{self.crop}_perc_area.png"
657
+ fname = f"map_{self.country}_{self.crop}_perc_area.png"
640
658
  col = "% of total Area (ha)"
641
659
  plot.plot_df_shpfile(
642
660
  self.dg, # dataframe containing adm1 name and polygon
@@ -652,13 +670,13 @@ class Geoanalysis:
652
670
  cmap=pal.scientific.sequential.Bamako_20_r,
653
671
  series="sequential",
654
672
  show_bg=False,
655
- annotate_regions=True,
673
+ annotate_regions=self.annotate_regions,
656
674
  annotate_region_column=annotate_region_column,
657
675
  loc_legend="lower left",
658
676
  )
659
677
  #
660
678
  """ Unique regions """
661
- fname = f"{self.country}_{self.crop}_region_ID.png"
679
+ fname = f"map_{self.country}_{self.crop}_region_ID.png"
662
680
  col = "Region_ID"
663
681
  df_model[col] = df_model[col].astype(int) + 1
664
682
  if len(df_model["Region_ID"].unique() > 1):
@@ -668,27 +686,27 @@ class Geoanalysis:
668
686
  for key in df_time_period["Region_ID"].unique()
669
687
  }
670
688
 
671
- # plot.plot_df_shpfile(
672
- # self.dg, # dataframe containing adm1 name and polygon
673
- # df_model, # dataframe containing information that will be mapped
674
- # dict_lup=dict_region,
675
- # merge_col="Country Region", # Column on which to merge
676
- # name_country=countries, # Plot global map
677
- # name_col=col, # Which column to plot
678
- # dir_out=self.dir_plot / str(year), # Output directory
679
- # fname=fname, # Output file name
680
- # label=f"Region Cluster\n{self.crop.title()}",
681
- # vmin=df_model[col].min(),
682
- # vmax=df_model[col].max(),
683
- # cmap=pal.tableau.Tableau_20.mpl_colors,
684
- # series="qualitative",
685
- # show_bg=False,
686
- # alpha_feature=1,
687
- # use_key=True,
688
- # annotate_regions=True,
689
- # annotate_region_column=annotate_region_column,
690
- # loc_legend="lower left",
691
- # )
689
+ plot.plot_df_shpfile(
690
+ self.dg, # dataframe containing adm1 name and polygon
691
+ df_model, # dataframe containing information that will be mapped
692
+ dict_lup=dict_region,
693
+ merge_col="Country Region", # Column on which to merge
694
+ name_country=countries, # Plot global map
695
+ name_col=col, # Which column to plot
696
+ dir_out=self.dir_plot / str(year), # Output directory
697
+ fname=fname, # Output file name
698
+ label=f"Region Cluster\n{self.crop.title()}",
699
+ vmin=df_model[col].min(),
700
+ vmax=df_model[col].max(),
701
+ cmap=pal.tableau.Tableau_20.mpl_colors,
702
+ series="qualitative",
703
+ show_bg=False,
704
+ alpha_feature=1,
705
+ use_key=True,
706
+ annotate_regions=self.annotate_regions,
707
+ annotate_region_column=annotate_region_column,
708
+ loc_legend="lower left",
709
+ )
692
710
  # breakpoint()
693
711
 
694
712
  # """ Anomaly """
@@ -715,7 +733,7 @@ class Geoanalysis:
715
733
  # )
716
734
 
717
735
  """ Predicted Yield """
718
- fname = f"{fname_prefix}_{self.crop}_{time_period}_{year}_predicted_yield.png"
736
+ fname = f"map_{fname_prefix}_{self.crop}_{time_period}_{year}_predicted_yield.png"
719
737
  plot.plot_df_shpfile(
720
738
  self.dg, # dataframe containing adm1 name and polygon
721
739
  df_harvest_year, # dataframe containing information that will be mapped
@@ -730,7 +748,7 @@ class Geoanalysis:
730
748
  cmap=pal.scientific.sequential.Bamako_20_r,
731
749
  series="sequential",
732
750
  show_bg=False,
733
- annotate_regions=True,
751
+ annotate_regions=self.annotate_regions,
734
752
  annotate_region_column=annotate_region_column,
735
753
  loc_legend="lower left",
736
754
  )
@@ -759,7 +777,7 @@ class Geoanalysis:
759
777
  # Area
760
778
  # breakpoint()
761
779
  if df_time_period["Area (ha)"].notna().all():
762
- fname = f"{self.country}_{self.crop}_{year}_area.png"
780
+ fname = f"map_{self.country}_{self.crop}_{year}_area.png"
763
781
  plot.plot_df_shpfile(
764
782
  self.dg, # dataframe containing adm1 name and polygon
765
783
  df_time_period, # dataframe containing information that will be mapped
@@ -774,7 +792,7 @@ class Geoanalysis:
774
792
  cmap=pal.scientific.sequential.Bamako_20_r,
775
793
  series="sequential",
776
794
  show_bg=False,
777
- annotate_regions=True,
795
+ annotate_regions=self.annotate_regions,
778
796
  loc_legend="lower left",
779
797
  )
780
798
 
@@ -884,6 +902,7 @@ class Geoanalysis:
884
902
  engine="pyogrio",
885
903
  )
886
904
  self.admin_col_name = self.parser.get(country, "admin_col_name")
905
+ self.annotate_regions = self.parser.getboolean(country, "annotate_regions")
887
906
 
888
907
  # If ADMIN0 or ADM0_NAME is not in the shapefile, then add ADM0_NAME
889
908
  if "ADMIN0" or "ADM0_NAME" not in self.dg.columns:
@@ -950,18 +969,28 @@ class RegionalMapper(Geoanalysis):
950
969
  con = sqlite3.connect(self.db_path)
951
970
 
952
971
  query = "SELECT * FROM regional_metrics"
953
- self.df_regional = pd.read_sql_query(query, con)
972
+ try:
973
+ self.df_regional = pd.read_sql_query(query, con)
974
+ except:
975
+ self.logger.error("Failed to read data from regional_metrics")
976
+ self.df_regional = pd.DataFrame()
977
+
954
978
  query = "SELECT * FROM regional_metrics_by_year"
955
- self.df_regional_by_year = pd.read_sql_query(query, con)
979
+ try:
980
+ self.df_regional_by_year = pd.read_sql_query(query, con)
981
+ except:
982
+ self.logger.error("Failed to read data from regional_metrics_by_year")
983
+ self.df_regional_by_year = pd.DataFrame()
956
984
 
957
985
  con.close()
958
986
 
959
987
  def clean_data(self):
960
988
  """Clean and format the data."""
961
- self.df_regional["Country"] = (
962
- self.df_regional["Country"].str.replace("_", " ").str.title()
963
- )
964
- self.df_regional["Model"] = self.df_regional["Model"].str.title()
989
+ if not self.df_regional.empty:
990
+ self.df_regional["Country"] = (
991
+ self.df_regional["Country"].str.replace("_", " ").str.title()
992
+ )
993
+ self.df_regional["Model"] = self.df_regional["Model"].str.title()
965
994
 
966
995
  def plot_heatmap(self):
967
996
  """Generate heatmaps of MAPE bins vs. % total area bins."""
@@ -1056,21 +1085,21 @@ class RegionalMapper(Geoanalysis):
1056
1085
  plt.minorticks_on()
1057
1086
  plt.xlabel("Mean Absolute Percentage Error (%)")
1058
1087
  plt.ylabel("Frequency")
1059
- plt.legend(title="Country", title_fontsize="13")
1088
+ plt.legend(title="Country", title_fontsize="16")
1060
1089
 
1061
1090
  # Adding the title at the top-right corner
1062
- plt.text(
1063
- 0.95, 0.95, # Coordinates in axes fraction
1064
- f"Model: {model}",
1065
- transform=plt.gca().transAxes,
1066
- fontsize=14,
1067
- verticalalignment="top",
1068
- horizontalalignment="right",
1069
- bbox=dict(facecolor="white", alpha=0.6, edgecolor="none")
1070
- )
1091
+ # plt.text(
1092
+ # 0.95, 0.95, # Coordinates in axes fraction
1093
+ # f"Model: {model}",
1094
+ # transform=plt.gca().transAxes,
1095
+ # fontsize=14,
1096
+ # verticalalignment="top",
1097
+ # horizontalalignment="right",
1098
+ # bbox=dict(facecolor="white", alpha=0.6, edgecolor="none")
1099
+ # )
1071
1100
 
1072
1101
  plt.tight_layout()
1073
- plt.savefig(self.dir_analysis / f"mape_histogram_{model}.png", dpi=250)
1102
+ plt.savefig(self.dir_analysis / f"histogram_region_{model}_mape.png", dpi=250)
1074
1103
  plt.close()
1075
1104
 
1076
1105
  def plot_mape_map(self):
@@ -1101,7 +1130,7 @@ class RegionalMapper(Geoanalysis):
1101
1130
  df = df_model[df_model["Country"].isin(countries)]
1102
1131
  self.dg = self.dg[self.dg["ADM0_NAME"].isin(countries)]
1103
1132
 
1104
- fname = f"mape_{crop}_{df_model['Model'].iloc[0]}.png"
1133
+ fname = f"map_{crop}_{df_model['Model'].iloc[0]}_mape.png"
1105
1134
  plot.plot_df_shpfile(
1106
1135
  self.dg,
1107
1136
  df,
@@ -1116,7 +1145,7 @@ class RegionalMapper(Geoanalysis):
1116
1145
  cmap=pal.scientific.sequential.Bamako_20_r,
1117
1146
  series="sequential",
1118
1147
  show_bg=False,
1119
- annotate_regions=True,
1148
+ annotate_regions=self.annotate_regions,
1120
1149
  loc_legend="lower left",
1121
1150
  )
1122
1151
 
@@ -1145,7 +1174,7 @@ class RegionalMapper(Geoanalysis):
1145
1174
  plt.xticks(rotation=0)
1146
1175
 
1147
1176
  plt.tight_layout()
1148
- plt.savefig(self.dir_analysis / "mape_by_year.png", dpi=250)
1177
+ plt.savefig(self.dir_analysis / "bar_mape_by_year.png", dpi=250)
1149
1178
  plt.close()
1150
1179
 
1151
1180
 
@@ -582,11 +582,10 @@ class Geocif:
582
582
  }
583
583
  )
584
584
 
585
- if self.median_yield_as_feature:
586
- # Add median yield to dataframe
587
- df.loc[:, f"Median {self.target}"] = np.around(
588
- df_region[f"Median {self.target}"].values, 3
589
- )
585
+ # Add median yield to dataframe
586
+ df.loc[:, f"Median {self.target}"] = np.around(
587
+ df_region[f"Median {self.target}"].values, 3
588
+ )
590
589
 
591
590
  if self.estimate_ci:
592
591
  if self.estimate_ci_for_all or self.forecast_season == self.today_year:
@@ -1142,6 +1141,9 @@ class Geocif:
1142
1141
  group_by = ["Region"]
1143
1142
  groups = self.df_train.groupby(group_by)
1144
1143
  for key, group in groups:
1144
+ if group.empty:
1145
+ continue
1146
+
1145
1147
  if self.check_yield_trend:
1146
1148
  if group[self.target].any():
1147
1149
  detrended_data = trend.detrend_dataframe(
@@ -361,10 +361,11 @@ def classify_target(df, target_col, number_classes):
361
361
  new_target_col = f"{target_col}_class"
362
362
 
363
363
  # Change the target column to categorical with the specified number of classes
364
- try:
365
- df[new_target_col], bins = pd.qcut(df[target_col], q=number_classes, labels=False, retbins=True, duplicates='drop')
366
- except:
367
- breakpoint()
364
+ df[new_target_col], bins = pd.qcut(df[target_col],
365
+ q=number_classes,
366
+ labels=False,
367
+ retbins=True,
368
+ duplicates='drop')
368
369
 
369
370
  return df, new_target_col, bins
370
371
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.53
3
+ Version: 0.1.55
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.53",
53
+ version="0.1.55",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes