geocif 0.1.94__tar.gz → 0.1.96__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {geocif-0.1.94/geocif.egg-info → geocif-0.1.96}/PKG-INFO +1 -1
  2. {geocif-0.1.94 → geocif-0.1.96}/geocif/analysis.py +72 -9
  3. {geocif-0.1.94 → geocif-0.1.96}/geocif/geocif.py +2 -2
  4. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/feature_selection.py +23 -1
  5. {geocif-0.1.94 → geocif-0.1.96}/geocif/viz/plot.py +22 -8
  6. {geocif-0.1.94 → geocif-0.1.96/geocif.egg-info}/PKG-INFO +1 -1
  7. {geocif-0.1.94 → geocif-0.1.96}/setup.py +1 -1
  8. {geocif-0.1.94 → geocif-0.1.96}/LICENSE +0 -0
  9. {geocif-0.1.94 → geocif-0.1.96}/MANIFEST.in +0 -0
  10. {geocif-0.1.94 → geocif-0.1.96}/README.md +0 -0
  11. {geocif-0.1.94 → geocif-0.1.96}/geocif/__init__.py +0 -0
  12. {geocif-0.1.94 → geocif-0.1.96}/geocif/agmet/__init__.py +0 -0
  13. {geocif-0.1.94 → geocif-0.1.96}/geocif/agmet/geoagmet.py +0 -0
  14. {geocif-0.1.94 → geocif-0.1.96}/geocif/agmet/plot.py +0 -0
  15. {geocif-0.1.94 → geocif-0.1.96}/geocif/agmet/utils.py +0 -0
  16. {geocif-0.1.94 → geocif-0.1.96}/geocif/backup/__init__.py +0 -0
  17. {geocif-0.1.94 → geocif-0.1.96}/geocif/backup/constants.py +0 -0
  18. {geocif-0.1.94 → geocif-0.1.96}/geocif/backup/features.py +0 -0
  19. {geocif-0.1.94 → geocif-0.1.96}/geocif/backup/geo.py +0 -0
  20. {geocif-0.1.94 → geocif-0.1.96}/geocif/backup/geocif.py +0 -0
  21. {geocif-0.1.94 → geocif-0.1.96}/geocif/backup/metadata.py +0 -0
  22. {geocif-0.1.94 → geocif-0.1.96}/geocif/backup/models.py +0 -0
  23. {geocif-0.1.94 → geocif-0.1.96}/geocif/cei/__init__.py +0 -0
  24. {geocif-0.1.94 → geocif-0.1.96}/geocif/cei/definitions.py +0 -0
  25. {geocif-0.1.94 → geocif-0.1.96}/geocif/cei/indices.py +0 -0
  26. {geocif-0.1.94 → geocif-0.1.96}/geocif/experiments.py +0 -0
  27. {geocif-0.1.94 → geocif-0.1.96}/geocif/geocif_runner.py +0 -0
  28. {geocif-0.1.94 → geocif-0.1.96}/geocif/indices_runner.py +0 -0
  29. {geocif-0.1.94 → geocif-0.1.96}/geocif/indices_runner_angola.py +0 -0
  30. {geocif-0.1.94 → geocif-0.1.96}/geocif/indices_runner_madagascar.py +0 -0
  31. {geocif-0.1.94 → geocif-0.1.96}/geocif/indices_runner_malawi.py +0 -0
  32. {geocif-0.1.94 → geocif-0.1.96}/geocif/indices_runner_mozambique.py +0 -0
  33. {geocif-0.1.94 → geocif-0.1.96}/geocif/indices_runner_south_africa.py +0 -0
  34. {geocif-0.1.94 → geocif-0.1.96}/geocif/indices_runner_zambia.py +0 -0
  35. {geocif-0.1.94 → geocif-0.1.96}/geocif/indices_runner_zimbabwe.py +0 -0
  36. {geocif-0.1.94 → geocif-0.1.96}/geocif/logger.py +0 -0
  37. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/__init__.py +0 -0
  38. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/correlations.py +0 -0
  39. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/embedding.py +0 -0
  40. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/feature_engineering.py +0 -0
  41. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/outliers.py +0 -0
  42. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/outlook.py +0 -0
  43. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/output.py +0 -0
  44. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/spatial_autocorrelation.py +0 -0
  45. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/stages.py +0 -0
  46. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/stats.py +0 -0
  47. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/trainers.py +0 -0
  48. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/trend.py +0 -0
  49. {geocif-0.1.94 → geocif-0.1.96}/geocif/ml/xai.py +0 -0
  50. {geocif-0.1.94 → geocif-0.1.96}/geocif/mm.py +0 -0
  51. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/__init__.py +0 -0
  52. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/aa.py +0 -0
  53. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/area.py +0 -0
  54. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/automl.py +0 -0
  55. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/download_esi.py +0 -0
  56. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/enso.py +0 -0
  57. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/eval.py +0 -0
  58. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/gamtest.py +0 -0
  59. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/gee_access.py +0 -0
  60. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/misc.py +0 -0
  61. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/play_xagg.py +0 -0
  62. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/reg.py +0 -0
  63. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/sustain.py +0 -0
  64. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/test_catboost.py +0 -0
  65. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/tmp.py +0 -0
  66. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/tmp2.py +0 -0
  67. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/tmp3.py +0 -0
  68. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/tmp4.py +0 -0
  69. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/tmp5.py +0 -0
  70. {geocif-0.1.94 → geocif-0.1.96}/geocif/playground/wolayita_maize_mask.py +0 -0
  71. {geocif-0.1.94 → geocif-0.1.96}/geocif/risk/__init__.py +0 -0
  72. {geocif-0.1.94 → geocif-0.1.96}/geocif/risk/impact_assessment.py +0 -0
  73. {geocif-0.1.94 → geocif-0.1.96}/geocif/utils.py +0 -0
  74. {geocif-0.1.94 → geocif-0.1.96}/geocif/viz/__init__.py +0 -0
  75. {geocif-0.1.94 → geocif-0.1.96}/geocif/viz/gt.py +0 -0
  76. {geocif-0.1.94 → geocif-0.1.96}/geocif/viz/tmp.py +0 -0
  77. {geocif-0.1.94 → geocif-0.1.96}/geocif.egg-info/SOURCES.txt +0 -0
  78. {geocif-0.1.94 → geocif-0.1.96}/geocif.egg-info/dependency_links.txt +0 -0
  79. {geocif-0.1.94 → geocif-0.1.96}/geocif.egg-info/not-zip-safe +0 -0
  80. {geocif-0.1.94 → geocif-0.1.96}/geocif.egg-info/top_level.txt +0 -0
  81. {geocif-0.1.94 → geocif-0.1.96}/requirements.txt +0 -0
  82. {geocif-0.1.94 → geocif-0.1.96}/setup.cfg +0 -0
  83. {geocif-0.1.94 → geocif-0.1.96}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.94
3
+ Version: 0.1.96
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -627,7 +627,7 @@ class Geoanalysis:
627
627
  self.df_analysis["Anomaly"] = (
628
628
  self.df_analysis[self.predicted]
629
629
  * 100.0
630
- / self.df_analysis["Median Yield (tn per ha)"]
630
+ / self.df_analysis["Median Yield (tn per ha) (2018-2022)_y"]
631
631
  )
632
632
 
633
633
  # Compute the yield from the last year
@@ -782,6 +782,73 @@ class Geoanalysis:
782
782
  loc_legend="lower left",
783
783
  )
784
784
 
785
+ # Make map of predicted yield by country
786
+ for country in countries:
787
+ df_country = df_model[df_model["Country"] == country.lower().replace(" ", "_")]
788
+ fname = f"map_perc_area_{self.country}_{self.crop}.png"
789
+ col = "% of total Area (ha)"
790
+ plot.plot_df_shpfile(
791
+ self.dg, # dataframe containing adm1 name and polygon
792
+ df_country, # dataframe containing information that will be mapped
793
+ merge_col="Country Region", # Column on which to merge
794
+ name_country=[country], # Plot global map
795
+ name_col=col, # Which column to plot
796
+ dir_out=self.dir_plot / str(year), # Output directory
797
+ fname=fname, # Output file name
798
+ label=f"% of Total Area (ha)\n{self.crop.title()}",
799
+ vmin=df_country[col].min(),
800
+ vmax=df_country[col].max(),
801
+ cmap=pal.scientific.sequential.Bamako_20_r,
802
+ series="sequential",
803
+ show_bg=False,
804
+ annotate_regions=self.annotate_regions,
805
+ annotate_region_column=annotate_region_column,
806
+ loc_legend="lower left",
807
+ )
808
+
809
+ df_country = df_harvest_year[df_harvest_year["Country"] == country.lower().replace(" ", "_")]
810
+ fname = f"map_predicted_yield_{country}_{self.crop}_{time_period}_{year}.png"
811
+ plot.plot_df_shpfile(
812
+ self.dg, # dataframe containing adm1 name and polygon
813
+ df_country, # dataframe containing information that will be mapped
814
+ merge_col="Country Region", # Column on which to merge
815
+ name_country=[country], # Plot global map
816
+ name_col="Predicted Yield (tn per ha)", # Which column to plot
817
+ dir_out=self.dir_plot / str(year), # Output directory
818
+ fname=fname, # Output file name
819
+ label=f"Predicted Yield (Mg/ha)\n{self.crop.title()}, {year}",
820
+ vmin=df_country[self.predicted].min(),
821
+ vmax=df_country[self.predicted].max(),
822
+ cmap=pal.scientific.sequential.Bamako_20_r,
823
+ series="sequential",
824
+ show_bg=False,
825
+ annotate_regions=self.annotate_regions,
826
+ annotate_region_column=annotate_region_column,
827
+ loc_legend="lower left",
828
+ )
829
+
830
+ fname = (
831
+ f"map_anomaly_{country}_{self.crop}_{time_period}_{year}.png"
832
+ )
833
+ plot.plot_df_shpfile(
834
+ self.dg, # dataframe containing adm1 name and polygon
835
+ df_country, # dataframe containing information that will be mapped
836
+ merge_col="Country Region", # Column on which to merge
837
+ name_country=[country], # Plot global map
838
+ name_col="Anomaly", # Which column to plot
839
+ dir_out=self.dir_plot / str(year), # Output directory
840
+ fname=fname, # Output file name
841
+ label=f"% of {self.number_lag_years}-year Median Yield\n{self.crop.title()}, {year}",
842
+ vmin=df_country["Anomaly"].min(),
843
+ vmax=110, # df_harvest_year["Anomaly"].max(),
844
+ cmap=pal.cartocolors.diverging.Geyser_5_r,
845
+ series="sequential",
846
+ show_bg=False,
847
+ annotate_regions=self.annotate_regions,
848
+ annotate_region_column=annotate_region_column,
849
+ loc_legend="lower left",
850
+ )
851
+
785
852
  """ Ratio of Predicted to last Year Yield """
786
853
  # fname = f"{self.country}_{self.crop}_{time_period}_{year}_ratio_last_year_yield.png"
787
854
  # plot.plot_df_shpfile(
@@ -934,18 +1001,14 @@ class Geoanalysis:
934
1001
  self.annotate_regions = self.parser.getboolean(country, "annotate_regions")
935
1002
 
936
1003
  # If ADMIN0 or ADM0_NAME is not in the shapefile, then add ADM0_NAME
937
- if "ADMIN0" or "ADM0_NAME" not in self.dg.columns:
1004
+ if "ADMIN0" not in self.dg.columns and "ADM0_NAME" not in self.dg.columns:
938
1005
  self.dg.loc[:, "ADMIN0"] = country.title().replace("_", " ")
939
- # if ADMIN1 or ADM1_NAME is not in the shapefile, then rename admin_col_name to ADM1_NAME
940
- if "ADMIN1" or "ADM1_NAME" not in self.dg.columns:
1006
+
1007
+ # If ADMIN1 or ADM1_NAME is not in the shapefile, then rename admin_col_name to ADMIN1
1008
+ if "ADMIN1" not in self.dg.columns and "ADM1_NAME" not in self.dg.columns:
941
1009
  if admin_zone == "admin_1":
942
1010
  self.dg.rename(columns={self.admin_col_name: "ADMIN1"}, inplace=True)
943
1011
 
944
- # Hack rename Tanzania to United Republic of Tanzania
945
- self.dg["ADMIN0"] = self.dg["ADMIN0"].replace(
946
- "Tanzania", "United Republic of Tanzania"
947
- )
948
-
949
1012
  # Rename ADMIN0 to ADM0_NAME and ADMIN1 to ADM1_NAME and ADMIN2 to ADM2_NAME
950
1013
  self.dg = self.dg.rename(
951
1014
  columns={
@@ -1005,6 +1005,7 @@ class Geocif:
1005
1005
  # and will confuse the model
1006
1006
  if self.forecast_season == self.today_year:
1007
1007
  current_month = ar.utcnow().month
1008
+ current_day = ar.utcnow().day
1008
1009
 
1009
1010
  # Identify columns where the second chunk equals the current month index
1010
1011
  cols_to_drop = []
@@ -1014,11 +1015,10 @@ class Geocif:
1014
1015
  "Starting Stage"
1015
1016
  ]
1016
1017
 
1017
- if mon == current_month:
1018
+ if mon == current_month and current_day > 25:
1018
1019
  cols_to_drop.append(col)
1019
1020
 
1020
1021
  # Drop those columns
1021
-
1022
1022
  df = df.drop(columns=cols_to_drop)
1023
1023
 
1024
1024
  # Change column name
@@ -133,6 +133,27 @@ def select_features(X, y, method="RFE", min_features_to_select=3, threshold_nan=
133
133
  selected_features = (
134
134
  shap_importance_df["feature"].head(optimal_N).values.tolist()
135
135
  )
136
+ elif method == "stabl":
137
+ from stabl.stabl import Stabl
138
+ from sklearn.linear_model import LogisticRegression
139
+
140
+ lasso = LogisticRegression(
141
+ penalty="l1", class_weight="balanced", max_iter=int(1e6), solver="liblinear", random_state=42
142
+ )
143
+ stabl = Stabl(
144
+ base_estimator=lasso,
145
+ n_bootstraps=100,
146
+ artificial_type="knockoff",
147
+ artificial_proportion=.5,
148
+ replace=False,
149
+ fdr_threshold_range=np.arange(0.1, 1, 0.01),
150
+ sample_fraction=0.5,
151
+ random_state=42,
152
+ lambda_grid={"C": np.linspace(0.004, 0.4, 30)},
153
+ verbose=1
154
+ )
155
+ stabl.fit(X, y)
156
+ selected_features = stabl.get_feature_names_out()
136
157
  elif method == "feature_engine":
137
158
  from feature_engine.selection import SmartCorrelatedSelection
138
159
 
@@ -304,7 +325,8 @@ def select_features(X, y, method="RFE", min_features_to_select=3, threshold_nan=
304
325
  else:
305
326
  raise ValueError("Method not recognized. Use BorutaPy, Genetic, or RFE")
306
327
  # tentative_features = X.columns[selector.support_weak_].tolist()
307
-
328
+ print(selected_features)
329
+ breakpoint()
308
330
  non_eo = are_all_features_non_eo(selected_features)
309
331
  if non_eo or method == "SelectKBest":
310
332
  from sklearn.feature_selection import SelectKBest, f_regression
@@ -375,6 +375,11 @@ def plot_df_shpfile(
375
375
  )
376
376
 
377
377
  df_country = gpd.read_file(shpfilename, engine="pyogrio")
378
+ # Hack
379
+ # Rename Russia to Russian Federation, in the ADMIN column
380
+ df_country.loc[
381
+ df_country["ADMIN"].str.lower() == "russia", "ADMIN"
382
+ ] = "Russian Federation"
378
383
  # read the country borders
379
384
  _name_country = []
380
385
  for cntr in name_country:
@@ -390,14 +395,23 @@ def plot_df_shpfile(
390
395
  )
391
396
  _name_country.append(cntr.replace(" ", "_").lower())
392
397
 
393
- extent = rgeo.get_country_lat_lon_extent(
394
- _name_country, buffer=1.0
395
- ) # left, right, bottom, top
396
- # Hack: Add space to the top for adding title
397
- extent[3] = extent[3] + 2
398
- # Add some space to the bottom for adding legend and colorbar
399
- extent[2] = extent[2] - 3
400
- ax.set_extent(extent)
398
+ # Hack
399
+ if _name_country[0] == "russian_federation":
400
+ extent = [20, 80, 40, 80]
401
+ else:
402
+ extent = rgeo.get_country_lat_lon_extent(
403
+ _name_country, buffer=1.0
404
+ ) # left, right, bottom, top
405
+
406
+ # Hack: Add space to the top for adding title
407
+ extent[3] = extent[3] + 2
408
+ # Add some space to the bottom for adding legend and colorbar
409
+ extent[2] = extent[2] - 3
410
+
411
+ try:
412
+ ax.set_extent(extent)
413
+ except:
414
+ breakpoint()
401
415
  elif name_country == "world":
402
416
  ax.add_feature(
403
417
  cartopy.feature.LAND.with_scale("50m"), color="white"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.94
3
+ Version: 0.1.96
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.94",
53
+ version="0.1.96",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes