geocif 0.1.93__tar.gz → 0.1.95__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {geocif-0.1.93/geocif.egg-info → geocif-0.1.95}/PKG-INFO +1 -1
  2. {geocif-0.1.93 → geocif-0.1.95}/geocif/analysis.py +72 -9
  3. {geocif-0.1.93 → geocif-0.1.95}/geocif/geocif.py +17 -33
  4. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/embedding.py +1 -1
  5. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/stages.py +0 -5
  6. {geocif-0.1.93 → geocif-0.1.95}/geocif/viz/plot.py +22 -8
  7. {geocif-0.1.93 → geocif-0.1.95/geocif.egg-info}/PKG-INFO +1 -1
  8. {geocif-0.1.93 → geocif-0.1.95}/setup.py +1 -1
  9. {geocif-0.1.93 → geocif-0.1.95}/LICENSE +0 -0
  10. {geocif-0.1.93 → geocif-0.1.95}/MANIFEST.in +0 -0
  11. {geocif-0.1.93 → geocif-0.1.95}/README.md +0 -0
  12. {geocif-0.1.93 → geocif-0.1.95}/geocif/__init__.py +0 -0
  13. {geocif-0.1.93 → geocif-0.1.95}/geocif/agmet/__init__.py +0 -0
  14. {geocif-0.1.93 → geocif-0.1.95}/geocif/agmet/geoagmet.py +0 -0
  15. {geocif-0.1.93 → geocif-0.1.95}/geocif/agmet/plot.py +0 -0
  16. {geocif-0.1.93 → geocif-0.1.95}/geocif/agmet/utils.py +0 -0
  17. {geocif-0.1.93 → geocif-0.1.95}/geocif/backup/__init__.py +0 -0
  18. {geocif-0.1.93 → geocif-0.1.95}/geocif/backup/constants.py +0 -0
  19. {geocif-0.1.93 → geocif-0.1.95}/geocif/backup/features.py +0 -0
  20. {geocif-0.1.93 → geocif-0.1.95}/geocif/backup/geo.py +0 -0
  21. {geocif-0.1.93 → geocif-0.1.95}/geocif/backup/geocif.py +0 -0
  22. {geocif-0.1.93 → geocif-0.1.95}/geocif/backup/metadata.py +0 -0
  23. {geocif-0.1.93 → geocif-0.1.95}/geocif/backup/models.py +0 -0
  24. {geocif-0.1.93 → geocif-0.1.95}/geocif/cei/__init__.py +0 -0
  25. {geocif-0.1.93 → geocif-0.1.95}/geocif/cei/definitions.py +0 -0
  26. {geocif-0.1.93 → geocif-0.1.95}/geocif/cei/indices.py +0 -0
  27. {geocif-0.1.93 → geocif-0.1.95}/geocif/experiments.py +0 -0
  28. {geocif-0.1.93 → geocif-0.1.95}/geocif/geocif_runner.py +0 -0
  29. {geocif-0.1.93 → geocif-0.1.95}/geocif/indices_runner.py +0 -0
  30. {geocif-0.1.93 → geocif-0.1.95}/geocif/indices_runner_angola.py +0 -0
  31. {geocif-0.1.93 → geocif-0.1.95}/geocif/indices_runner_madagascar.py +0 -0
  32. {geocif-0.1.93 → geocif-0.1.95}/geocif/indices_runner_malawi.py +0 -0
  33. {geocif-0.1.93 → geocif-0.1.95}/geocif/indices_runner_mozambique.py +0 -0
  34. {geocif-0.1.93 → geocif-0.1.95}/geocif/indices_runner_south_africa.py +0 -0
  35. {geocif-0.1.93 → geocif-0.1.95}/geocif/indices_runner_zambia.py +0 -0
  36. {geocif-0.1.93 → geocif-0.1.95}/geocif/indices_runner_zimbabwe.py +0 -0
  37. {geocif-0.1.93 → geocif-0.1.95}/geocif/logger.py +0 -0
  38. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/__init__.py +0 -0
  39. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/correlations.py +0 -0
  40. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/feature_engineering.py +0 -0
  41. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/feature_selection.py +0 -0
  42. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/outliers.py +0 -0
  43. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/outlook.py +0 -0
  44. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/output.py +0 -0
  45. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/spatial_autocorrelation.py +0 -0
  46. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/stats.py +0 -0
  47. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/trainers.py +0 -0
  48. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/trend.py +0 -0
  49. {geocif-0.1.93 → geocif-0.1.95}/geocif/ml/xai.py +0 -0
  50. {geocif-0.1.93 → geocif-0.1.95}/geocif/mm.py +0 -0
  51. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/__init__.py +0 -0
  52. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/aa.py +0 -0
  53. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/area.py +0 -0
  54. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/automl.py +0 -0
  55. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/download_esi.py +0 -0
  56. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/enso.py +0 -0
  57. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/eval.py +0 -0
  58. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/gamtest.py +0 -0
  59. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/gee_access.py +0 -0
  60. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/misc.py +0 -0
  61. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/play_xagg.py +0 -0
  62. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/reg.py +0 -0
  63. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/sustain.py +0 -0
  64. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/test_catboost.py +0 -0
  65. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/tmp.py +0 -0
  66. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/tmp2.py +0 -0
  67. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/tmp3.py +0 -0
  68. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/tmp4.py +0 -0
  69. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/tmp5.py +0 -0
  70. {geocif-0.1.93 → geocif-0.1.95}/geocif/playground/wolayita_maize_mask.py +0 -0
  71. {geocif-0.1.93 → geocif-0.1.95}/geocif/risk/__init__.py +0 -0
  72. {geocif-0.1.93 → geocif-0.1.95}/geocif/risk/impact_assessment.py +0 -0
  73. {geocif-0.1.93 → geocif-0.1.95}/geocif/utils.py +0 -0
  74. {geocif-0.1.93 → geocif-0.1.95}/geocif/viz/__init__.py +0 -0
  75. {geocif-0.1.93 → geocif-0.1.95}/geocif/viz/gt.py +0 -0
  76. {geocif-0.1.93 → geocif-0.1.95}/geocif/viz/tmp.py +0 -0
  77. {geocif-0.1.93 → geocif-0.1.95}/geocif.egg-info/SOURCES.txt +0 -0
  78. {geocif-0.1.93 → geocif-0.1.95}/geocif.egg-info/dependency_links.txt +0 -0
  79. {geocif-0.1.93 → geocif-0.1.95}/geocif.egg-info/not-zip-safe +0 -0
  80. {geocif-0.1.93 → geocif-0.1.95}/geocif.egg-info/top_level.txt +0 -0
  81. {geocif-0.1.93 → geocif-0.1.95}/requirements.txt +0 -0
  82. {geocif-0.1.93 → geocif-0.1.95}/setup.cfg +0 -0
  83. {geocif-0.1.93 → geocif-0.1.95}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.93
3
+ Version: 0.1.95
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -627,7 +627,7 @@ class Geoanalysis:
627
627
  self.df_analysis["Anomaly"] = (
628
628
  self.df_analysis[self.predicted]
629
629
  * 100.0
630
- / self.df_analysis["Median Yield (tn per ha)"]
630
+ / self.df_analysis["Median Yield (tn per ha) (2018-2022)_y"]
631
631
  )
632
632
 
633
633
  # Compute the yield from the last year
@@ -782,6 +782,73 @@ class Geoanalysis:
782
782
  loc_legend="lower left",
783
783
  )
784
784
 
785
+ # Make map of predicted yield by country
786
+ for country in countries:
787
+ df_country = df_model[df_model["Country"] == country.lower().replace(" ", "_")]
788
+ fname = f"map_perc_area_{self.country}_{self.crop}.png"
789
+ col = "% of total Area (ha)"
790
+ plot.plot_df_shpfile(
791
+ self.dg, # dataframe containing adm1 name and polygon
792
+ df_country, # dataframe containing information that will be mapped
793
+ merge_col="Country Region", # Column on which to merge
794
+ name_country=[country], # Plot global map
795
+ name_col=col, # Which column to plot
796
+ dir_out=self.dir_plot / str(year), # Output directory
797
+ fname=fname, # Output file name
798
+ label=f"% of Total Area (ha)\n{self.crop.title()}",
799
+ vmin=df_country[col].min(),
800
+ vmax=df_country[col].max(),
801
+ cmap=pal.scientific.sequential.Bamako_20_r,
802
+ series="sequential",
803
+ show_bg=False,
804
+ annotate_regions=self.annotate_regions,
805
+ annotate_region_column=annotate_region_column,
806
+ loc_legend="lower left",
807
+ )
808
+
809
+ df_country = df_harvest_year[df_harvest_year["Country"] == country.lower().replace(" ", "_")]
810
+ fname = f"map_predicted_yield_{country}_{self.crop}_{time_period}_{year}.png"
811
+ plot.plot_df_shpfile(
812
+ self.dg, # dataframe containing adm1 name and polygon
813
+ df_country, # dataframe containing information that will be mapped
814
+ merge_col="Country Region", # Column on which to merge
815
+ name_country=[country], # Plot global map
816
+ name_col="Predicted Yield (tn per ha)", # Which column to plot
817
+ dir_out=self.dir_plot / str(year), # Output directory
818
+ fname=fname, # Output file name
819
+ label=f"Predicted Yield (Mg/ha)\n{self.crop.title()}, {year}",
820
+ vmin=df_country[self.predicted].min(),
821
+ vmax=df_country[self.predicted].max(),
822
+ cmap=pal.scientific.sequential.Bamako_20_r,
823
+ series="sequential",
824
+ show_bg=False,
825
+ annotate_regions=self.annotate_regions,
826
+ annotate_region_column=annotate_region_column,
827
+ loc_legend="lower left",
828
+ )
829
+
830
+ fname = (
831
+ f"map_anomaly_{country}_{self.crop}_{time_period}_{year}.png"
832
+ )
833
+ plot.plot_df_shpfile(
834
+ self.dg, # dataframe containing adm1 name and polygon
835
+ df_country, # dataframe containing information that will be mapped
836
+ merge_col="Country Region", # Column on which to merge
837
+ name_country=[country], # Plot global map
838
+ name_col="Anomaly", # Which column to plot
839
+ dir_out=self.dir_plot / str(year), # Output directory
840
+ fname=fname, # Output file name
841
+ label=f"% of {self.number_lag_years}-year Median Yield\n{self.crop.title()}, {year}",
842
+ vmin=df_country["Anomaly"].min(),
843
+ vmax=110, # df_harvest_year["Anomaly"].max(),
844
+ cmap=pal.cartocolors.diverging.Geyser_5_r,
845
+ series="sequential",
846
+ show_bg=False,
847
+ annotate_regions=self.annotate_regions,
848
+ annotate_region_column=annotate_region_column,
849
+ loc_legend="lower left",
850
+ )
851
+
785
852
  """ Ratio of Predicted to last Year Yield """
786
853
  # fname = f"{self.country}_{self.crop}_{time_period}_{year}_ratio_last_year_yield.png"
787
854
  # plot.plot_df_shpfile(
@@ -934,18 +1001,14 @@ class Geoanalysis:
934
1001
  self.annotate_regions = self.parser.getboolean(country, "annotate_regions")
935
1002
 
936
1003
  # If ADMIN0 or ADM0_NAME is not in the shapefile, then add ADM0_NAME
937
- if "ADMIN0" or "ADM0_NAME" not in self.dg.columns:
1004
+ if "ADMIN0" not in self.dg.columns and "ADM0_NAME" not in self.dg.columns:
938
1005
  self.dg.loc[:, "ADMIN0"] = country.title().replace("_", " ")
939
- # if ADMIN1 or ADM1_NAME is not in the shapefile, then rename admin_col_name to ADM1_NAME
940
- if "ADMIN1" or "ADM1_NAME" not in self.dg.columns:
1006
+
1007
+ # If ADMIN1 or ADM1_NAME is not in the shapefile, then rename admin_col_name to ADMIN1
1008
+ if "ADMIN1" not in self.dg.columns and "ADM1_NAME" not in self.dg.columns:
941
1009
  if admin_zone == "admin_1":
942
1010
  self.dg.rename(columns={self.admin_col_name: "ADMIN1"}, inplace=True)
943
1011
 
944
- # Hack rename Tanzania to United Republic of Tanzania
945
- self.dg["ADMIN0"] = self.dg["ADMIN0"].replace(
946
- "Tanzania", "United Republic of Tanzania"
947
- )
948
-
949
1012
  # Rename ADMIN0 to ADM0_NAME and ADMIN1 to ADM1_NAME and ADMIN2 to ADM2_NAME
950
1013
  self.dg = self.dg.rename(
951
1014
  columns={
@@ -1005,6 +1005,7 @@ class Geocif:
1005
1005
  # and will confuse the model
1006
1006
  if self.forecast_season == self.today_year:
1007
1007
  current_month = ar.utcnow().month
1008
+ current_day = ar.utcnow().day
1008
1009
 
1009
1010
  # Identify columns where the second chunk equals the current month index
1010
1011
  cols_to_drop = []
@@ -1014,51 +1015,30 @@ class Geocif:
1014
1015
  "Starting Stage"
1015
1016
  ]
1016
1017
 
1017
- if mon == current_month:
1018
+ if mon == current_month and current_day > 25:
1018
1019
  cols_to_drop.append(col)
1019
1020
 
1020
1021
  # Drop those columns
1021
-
1022
1022
  df = df.drop(columns=cols_to_drop)
1023
- from collections import Counter
1024
- esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1025
- dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1026
- print("<0>", dupes)
1027
- # Hack: If
1023
+
1028
1024
  # Change column name
1029
1025
  # e.g. 'vDTR_7_6_5_4_3_2_1_37_36_35_34_33_32_31' to 'vDTR Mar 1-Oct 27'
1030
1026
  df = stages.update_feature_names(df, self.method)
1031
- from collections import Counter
1032
- esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1033
- dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1034
- print("<111>", dupes)
1027
+
1035
1028
  all_cei_columns = self.get_cei_column_names(df)
1036
1029
  # Fill in any missing values with 0
1037
1030
  df.loc[:, all_cei_columns].fillna(0, inplace=True)
1038
- from collections import Counter
1039
- esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1040
- dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1041
- print("<1>", dupes)
1042
1031
 
1043
1032
  df = fe.compute_last_year_yield(df, self.target)
1044
- from collections import Counter
1045
- esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1046
- dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1047
- print("<2>", dupes)
1033
+
1048
1034
  df = fe.compute_median_statistics(
1049
1035
  df, self.all_seasons_with_yield, self.number_median_years, self.target
1050
1036
  )
1051
- from collections import Counter
1052
- esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1053
- dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1054
- print("<3>", dupes)
1037
+
1055
1038
  df = fe.compute_user_median_statistics(df, range(2018, 2023))
1056
1039
 
1057
1040
  df = fe.compute_user_median_statistics(df, range(2013, 2018))
1058
- from collections import Counter
1059
- esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1060
- dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1061
- print("<4>", dupes)
1041
+
1062
1042
  if self.median_area_as_feature:
1063
1043
  df = fe.compute_median_statistics(
1064
1044
  df, self.all_seasons_with_yield, self.number_median_years, "Area (ha)"
@@ -1073,10 +1053,7 @@ class Geocif:
1073
1053
  df = fe.compute_analogous_yield(
1074
1054
  df, self.all_seasons_with_yield, self.number_median_years, self.target
1075
1055
  )
1076
- from collections import Counter
1077
- esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1078
- dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1079
- print("5", dupes)
1056
+
1080
1057
  # Create Region_ID column based on Region column category code
1081
1058
  df["Region"] = df["Region"].astype("category")
1082
1059
  if self.cluster_strategy == "single":
@@ -1090,7 +1067,7 @@ class Geocif:
1090
1067
 
1091
1068
  # Region_ID should be type category
1092
1069
  df["Region_ID"] = df["Region_ID"].astype("category")
1093
- breakpoint()
1070
+
1094
1071
  return df
1095
1072
 
1096
1073
  def execute(self):
@@ -1147,7 +1124,14 @@ class Geocif:
1147
1124
 
1148
1125
  # Join with dg based on Country Region column, only keeping rows that are in df
1149
1126
  # Only use geometry column from self.dg
1150
- self.dg_country = self.dg_country[["Country Region", "geometry"]].merge(
1127
+ if self.admin_zone == "admin_1":
1128
+ cols = ["Country Region", "geometry", "ADM1_NAME"]
1129
+ elif self.admin_zone == "admin_2":
1130
+ cols = ["Country Region", "geometry", "ADM2_NAME"]
1131
+ else:
1132
+ raise ValueError(f"Unsopported {self.admin_zone}")
1133
+
1134
+ self.dg_country = self.dg_country[cols].merge(
1151
1135
  df[["Country Region", self.correlation_plot_groupby]],
1152
1136
  on="Country Region",
1153
1137
  how="outer",
@@ -47,7 +47,7 @@ def _compute_correlations(X, y):
47
47
  r = pearsonr(y_filtered, f_series_filtered)[0]
48
48
  feature_correlations[feature] = round(r, 3)
49
49
  except Exception as e:
50
- print(f"Error computing correlation for {feature}: {e}")
50
+ # print(f"Error computing correlation for {feature}: {e}")
51
51
  feature_correlations[feature] = np.nan
52
52
 
53
53
  return feature_correlations
@@ -268,11 +268,6 @@ def update_feature_names(df, method):
268
268
  # Saving the result in the dictionary
269
269
  stages_info[element] = (cei, start_stage, end_stage, new_column_name)
270
270
 
271
- # Check if any duplicates exist in the dictionary
272
- if len(stages_info) != len(set(stages_info.values())):
273
- breakpoint()
274
- raise ValueError(f"Duplicate stage information found for {element}")
275
- breakpoint()
276
271
  # For each column in df, check if it exists in stages_info, and
277
272
  # replace it with the new column name
278
273
  # Precompute the rename mapping outside the loop
@@ -375,6 +375,11 @@ def plot_df_shpfile(
375
375
  )
376
376
 
377
377
  df_country = gpd.read_file(shpfilename, engine="pyogrio")
378
+ # Hack
379
+ # Rename Russia to Russian Federation, in the ADMIN column
380
+ df_country.loc[
381
+ df_country["ADMIN"].str.lower() == "russia", "ADMIN"
382
+ ] = "Russian Federation"
378
383
  # read the country borders
379
384
  _name_country = []
380
385
  for cntr in name_country:
@@ -390,14 +395,23 @@ def plot_df_shpfile(
390
395
  )
391
396
  _name_country.append(cntr.replace(" ", "_").lower())
392
397
 
393
- extent = rgeo.get_country_lat_lon_extent(
394
- _name_country, buffer=1.0
395
- ) # left, right, bottom, top
396
- # Hack: Add space to the top for adding title
397
- extent[3] = extent[3] + 2
398
- # Add some space to the bottom for adding legend and colorbar
399
- extent[2] = extent[2] - 3
400
- ax.set_extent(extent)
398
+ # Hack
399
+ if _name_country[0] == "russian_federation":
400
+ extent = [20, 80, 40, 80]
401
+ else:
402
+ extent = rgeo.get_country_lat_lon_extent(
403
+ _name_country, buffer=1.0
404
+ ) # left, right, bottom, top
405
+
406
+ # Hack: Add space to the top for adding title
407
+ extent[3] = extent[3] + 2
408
+ # Add some space to the bottom for adding legend and colorbar
409
+ extent[2] = extent[2] - 3
410
+
411
+ try:
412
+ ax.set_extent(extent)
413
+ except:
414
+ breakpoint()
401
415
  elif name_country == "world":
402
416
  ax.add_feature(
403
417
  cartopy.feature.LAND.with_scale("50m"), color="white"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.93
3
+ Version: 0.1.95
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.93",
53
+ version="0.1.95",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes