geocif 0.1.30__tar.gz → 0.1.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {geocif-0.1.30/geocif.egg-info → geocif-0.1.31}/PKG-INFO +1 -1
  2. {geocif-0.1.30 → geocif-0.1.31}/geocif/geocif.py +7 -4
  3. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/spatial_autocorrelation.py +46 -27
  4. {geocif-0.1.30 → geocif-0.1.31/geocif.egg-info}/PKG-INFO +1 -1
  5. {geocif-0.1.30 → geocif-0.1.31}/setup.py +1 -1
  6. {geocif-0.1.30 → geocif-0.1.31}/LICENSE +0 -0
  7. {geocif-0.1.30 → geocif-0.1.31}/MANIFEST.in +0 -0
  8. {geocif-0.1.30 → geocif-0.1.31}/README.md +0 -0
  9. {geocif-0.1.30 → geocif-0.1.31}/geocif/__init__.py +0 -0
  10. {geocif-0.1.30 → geocif-0.1.31}/geocif/agmet/__init__.py +0 -0
  11. {geocif-0.1.30 → geocif-0.1.31}/geocif/agmet/geoagmet.py +0 -0
  12. {geocif-0.1.30 → geocif-0.1.31}/geocif/agmet/plot.py +0 -0
  13. {geocif-0.1.30 → geocif-0.1.31}/geocif/agmet/utils.py +0 -0
  14. {geocif-0.1.30 → geocif-0.1.31}/geocif/analysis.py +0 -0
  15. {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/__init__.py +0 -0
  16. {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/constants.py +0 -0
  17. {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/features.py +0 -0
  18. {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/geo.py +0 -0
  19. {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/geocif.py +0 -0
  20. {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/metadata.py +0 -0
  21. {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/models.py +0 -0
  22. {geocif-0.1.30 → geocif-0.1.31}/geocif/cei/__init__.py +0 -0
  23. {geocif-0.1.30 → geocif-0.1.31}/geocif/cei/definitions.py +0 -0
  24. {geocif-0.1.30 → geocif-0.1.31}/geocif/cei/indices.py +0 -0
  25. {geocif-0.1.30 → geocif-0.1.31}/geocif/indices_runner.py +0 -0
  26. {geocif-0.1.30 → geocif-0.1.31}/geocif/logger.py +0 -0
  27. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/__init__.py +0 -0
  28. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/correlations.py +0 -0
  29. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/embedding.py +0 -0
  30. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/feature_engineering.py +0 -0
  31. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/feature_selection.py +0 -0
  32. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/outliers.py +0 -0
  33. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/outlook.py +0 -0
  34. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/output.py +0 -0
  35. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/stages.py +0 -0
  36. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/stats.py +0 -0
  37. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/trainers.py +0 -0
  38. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/trend.py +0 -0
  39. {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/xai.py +0 -0
  40. {geocif-0.1.30 → geocif-0.1.31}/geocif/playground/__init__.py +0 -0
  41. {geocif-0.1.30 → geocif-0.1.31}/geocif/playground/automl.py +0 -0
  42. {geocif-0.1.30 → geocif-0.1.31}/geocif/playground/misc.py +0 -0
  43. {geocif-0.1.30 → geocif-0.1.31}/geocif/utils.py +0 -0
  44. {geocif-0.1.30 → geocif-0.1.31}/geocif/viz/__init__.py +0 -0
  45. {geocif-0.1.30 → geocif-0.1.31}/geocif/viz/plot.py +0 -0
  46. {geocif-0.1.30 → geocif-0.1.31}/geocif.egg-info/SOURCES.txt +0 -0
  47. {geocif-0.1.30 → geocif-0.1.31}/geocif.egg-info/dependency_links.txt +0 -0
  48. {geocif-0.1.30 → geocif-0.1.31}/geocif.egg-info/not-zip-safe +0 -0
  49. {geocif-0.1.30 → geocif-0.1.31}/geocif.egg-info/top_level.txt +0 -0
  50. {geocif-0.1.30 → geocif-0.1.31}/requirements.txt +0 -0
  51. {geocif-0.1.30 → geocif-0.1.31}/setup.cfg +0 -0
  52. {geocif-0.1.30 → geocif-0.1.31}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.30
3
+ Version: 0.1.31
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -355,6 +355,8 @@ class Geocif:
355
355
  experiment_id = f"{self.country}_{self.crop}"
356
356
  now = ar.utcnow().to("America/New_York").format("MMMM-DD-YYYY HH:mm:ss")
357
357
  selected_features = self.selected_features + self.cat_features
358
+ # Compute percentage difference between y_pred and y_test
359
+ ape = np.abs((y_pred - y_test) / y_test) * 100
358
360
  df = pd.DataFrame(
359
361
  {
360
362
  "Experiment_ID": np.full(shp, experiment_id),
@@ -378,6 +380,7 @@ class Geocif:
378
380
  "Area (ha)": df_region["Area (ha)"].values,
379
381
  f"Observed {self.target}": np.around(y_test, 3).ravel(),
380
382
  f"Predicted {self.target}": np.around(y_pred, 3).ravel(),
383
+ f"APE": np.around(ape, 3).ravel(),
381
384
  }
382
385
  )
383
386
 
@@ -720,7 +723,7 @@ class Geocif:
720
723
  """ Convert this dataframe into an ML ready format and save to disk """
721
724
  df = self.create_ml_dataframe(df)
722
725
  dir_output = (
723
- self.dir_analysis / self.country / self.crop / str(self.forecast_season)
726
+ self.dir_analysis / self.country / self.crop / self.model_name / str(self.forecast_season)
724
727
  )
725
728
  os.makedirs(dir_output, exist_ok=True)
726
729
  df.to_csv(
@@ -768,6 +771,9 @@ class Geocif:
768
771
  dict_kwargs["dg_country"] = self.dg_country
769
772
  dict_kwargs["combined_dict"] = self.combined_dict
770
773
 
774
+ if self.spatial_autocorrelation:
775
+ sa.compute_spatial_autocorrelation(self.df_results, **dict_kwargs)
776
+
771
777
  if self.correlation_plots:
772
778
  self.logger.info(f"Correlation plot for {self.country} {self.crop}")
773
779
  (
@@ -775,9 +781,6 @@ class Geocif:
775
781
  dict_best_cei,
776
782
  ) = correlations.all_correlated_feature_by_time(df, **dict_kwargs)
777
783
 
778
- if self.spatial_autocorrelation:
779
- sa.compute_spatial_autocorrelation(self.df_results, **dict_kwargs)
780
-
781
784
  """ Separate into train and test datasets based on forecast_season """
782
785
  mask = df["Harvest Year"] == self.forecast_season
783
786
  self.df_train = df[~mask]
@@ -1,7 +1,11 @@
1
+ import warnings
2
+
3
+ from tqdm import tqdm
4
+ import matplotlib.pyplot as plt
1
5
  import pandas as pd
2
6
  from pysal.lib import weights
3
- from pysal.explore import esda
4
- import matplotlib.pyplot as plt
7
+
8
+ warnings.filterwarnings("ignore")
5
9
 
6
10
 
7
11
  def validate_inputs(df_results, required_columns):
@@ -40,24 +44,26 @@ def preprocess_data(df_results, dg_country):
40
44
  dg_country["Country Region"] = dg_country["Country Region"].str.lower()
41
45
  dg_country = dg_country[dg_country["Country Region"].isin(df["Country Region"])]
42
46
 
47
+ dg_country.reset_index(drop=True, inplace=True)
48
+
43
49
  merged_df = dg_country.merge(df, on="Country Region", how="inner")
44
50
 
45
- return merged_df, dg_country
51
+ return merged_df
46
52
 
47
53
 
48
- def create_base_weights(dg_country):
54
+ def create_base_weights(merged_df):
49
55
  """
50
56
 
51
57
  Args:
52
- dg_country:
58
+ merged_df:
53
59
 
54
60
  Returns:
55
61
 
56
62
  """
57
- dg_subset = dg_country[["Country Region", "geometry"]].drop_duplicates()
63
+ dg = merged_df[["Country Region", "geometry"]].drop_duplicates()
58
64
 
59
65
  try:
60
- w_base = weights.Queen.from_dataframe(dg_subset)
66
+ w_base = weights.Queen.from_dataframe(dg)
61
67
  except Exception as e:
62
68
  raise RuntimeError(f"Failed to create spatial weights: {e}")
63
69
 
@@ -65,13 +71,10 @@ def create_base_weights(dg_country):
65
71
  index for index, neighbors in w_base.neighbors.items() if len(neighbors) == 0
66
72
  ]
67
73
  if no_neighbors:
68
- print(f"Removing {len(no_neighbors)} polygons with 0 neighbors")
69
- dg_country = dg_country.drop(index=no_neighbors).reset_index(drop=True)
70
- w_base = weights.Queen.from_dataframe(
71
- dg_country[["Country Region", "geometry"]]
72
- )
74
+ dg = dg.drop(index=no_neighbors[0]).reset_index(drop=True)
75
+ w_base = weights.Queen.from_dataframe(dg[["Country Region", "geometry"]])
73
76
 
74
- return w_base, dg_country
77
+ return w_base, dg
75
78
 
76
79
 
77
80
  def create_weights_for_year(dg_country, regions_with_data):
@@ -84,14 +87,22 @@ def create_weights_for_year(dg_country, regions_with_data):
84
87
  Returns:
85
88
 
86
89
  """
87
- dg_subset = dg_country[dg_country["Country Region"].isin(regions_with_data)]
90
+ dg = dg_country[dg_country["Country Region"].isin(regions_with_data)]
91
+ dg = dg.reset_index(drop=True)
92
+
93
+ wt = weights.Queen.from_dataframe(dg)
88
94
 
89
- wt = weights.Queen.from_dataframe(dg_subset)
95
+ no_neighbors = [
96
+ index for index, neighbors in wt.neighbors.items() if len(neighbors) == 0
97
+ ]
98
+ if no_neighbors:
99
+ dg = dg.drop(index=no_neighbors[0]).reset_index(drop=True)
100
+ wt = weights.Queen.from_dataframe(dg[["Country Region", "geometry"]])
90
101
 
91
- return wt
102
+ return wt, dg
92
103
 
93
104
 
94
- def compute_morans_i(merged_df, dg_country):
105
+ def compute_morans_i(merged_df):
95
106
  """
96
107
 
97
108
  Args:
@@ -101,25 +112,35 @@ def compute_morans_i(merged_df, dg_country):
101
112
  Returns:
102
113
 
103
114
  """
115
+ from pysal.explore import esda
116
+
117
+ # Drop any regions with missing data
118
+ merged_df = merged_df.dropna(subset=["Yield (tn per ha)"])
119
+
104
120
  years = merged_df["Harvest Year"].unique()
105
121
  results = {"Harvest Year": [], "Moran's I": [], "p-value": [], "Significant": []}
106
122
 
107
- for year in years:
123
+ for year in tqdm(years, desc="Compute Moran's I"):
108
124
  year_data = merged_df[merged_df["Harvest Year"] == year]
109
125
  regions_with_data = year_data["Country Region"].unique()
110
126
  year_data = year_data[year_data["Country Region"].isin(regions_with_data)]
111
127
 
112
- y = year_data[["Region", "Yield (tn per ha)"]].drop_duplicates()
128
+ y = year_data[["Country Region", "Region", "Yield (tn per ha)"]].drop_duplicates()
129
+ dg_country = year_data[["Country Region", "geometry"]].drop_duplicates()
113
130
 
114
131
  if len(y) > 1:
115
- w = create_weights_for_year(dg_country, regions_with_data)
132
+ w, x = create_weights_for_year(dg_country, regions_with_data)
133
+ y = y[y["Country Region"].isin(x["Country Region"])]
116
134
 
117
135
  try:
118
136
  mi = esda.Moran(y["Yield (tn per ha)"].values, w, permutations=999)
119
137
  except:
120
138
  breakpoint()
121
139
  results["Harvest Year"].append(year)
122
- results["Moran's I"].append(mi.I)
140
+ try:
141
+ results["Moran's I"].append(mi.I)
142
+ except:
143
+ breakpoint()
123
144
  results["p-value"].append(mi.p_sim)
124
145
  results["Significant"].append(mi.p_sim < 0.1)
125
146
  else:
@@ -131,7 +152,7 @@ def compute_morans_i(merged_df, dg_country):
131
152
  return pd.DataFrame(results)
132
153
 
133
154
 
134
- def plot_moransi_time_series(results_df, country, crop, dir_output):
155
+ def plot_morans_i_time_series(results_df, country, crop, dir_output):
135
156
  """
136
157
 
137
158
  Args:
@@ -194,12 +215,10 @@ def compute_spatial_autocorrelation(df_results, **kwargs):
194
215
  ]
195
216
  validate_inputs(df_results, required_columns)
196
217
 
197
- merged_df, dg_country = preprocess_data(df_results, dg_country)
218
+ merged_df = preprocess_data(df_results, dg_country)
198
219
  if merged_df.empty:
199
220
  raise ValueError("No valid data available after preprocessing")
200
221
 
201
- w_base, dg_country = create_base_weights(dg_country)
202
-
203
- results_df = compute_morans_i(merged_df, dg_country)
222
+ results_df = compute_morans_i(merged_df)
204
223
 
205
- plot_moransi_time_series(results_df, country, crop, dir_output)
224
+ plot_morans_i_time_series(results_df, country, crop, dir_output)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.30
3
+ Version: 0.1.31
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.30",
53
+ version="0.1.31",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes