geocif 0.2.32__tar.gz → 0.2.34__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {geocif-0.2.32/geocif.egg-info → geocif-0.2.34}/PKG-INFO +1 -1
  2. {geocif-0.2.32 → geocif-0.2.34}/geocif/cei/indices.py +2 -2
  3. {geocif-0.2.32 → geocif-0.2.34}/geocif/geocif.py +28 -11
  4. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/correlations.py +7 -10
  5. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/embedding.py +1 -4
  6. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/feature_engineering.py +17 -5
  7. geocif-0.2.34/geocif/playground/wolayita.py +103 -0
  8. geocif-0.2.34/geocif/playground/wolayita_v2.py +80 -0
  9. geocif-0.2.34/geocif/playground/wolayita_v3.py +219 -0
  10. {geocif-0.2.32 → geocif-0.2.34}/geocif/viz/plot.py +10 -17
  11. geocif-0.2.34/geocif/viz/viz_ml.py +95 -0
  12. {geocif-0.2.32 → geocif-0.2.34/geocif.egg-info}/PKG-INFO +1 -1
  13. {geocif-0.2.32 → geocif-0.2.34}/geocif.egg-info/SOURCES.txt +4 -0
  14. {geocif-0.2.32 → geocif-0.2.34}/setup.py +1 -1
  15. {geocif-0.2.32 → geocif-0.2.34}/LICENSE +0 -0
  16. {geocif-0.2.32 → geocif-0.2.34}/MANIFEST.in +0 -0
  17. {geocif-0.2.32 → geocif-0.2.34}/README.md +0 -0
  18. {geocif-0.2.32 → geocif-0.2.34}/geocif/__init__.py +0 -0
  19. {geocif-0.2.32 → geocif-0.2.34}/geocif/agmet/__init__.py +0 -0
  20. {geocif-0.2.32 → geocif-0.2.34}/geocif/agmet/geoagmet.py +0 -0
  21. {geocif-0.2.32 → geocif-0.2.34}/geocif/agmet/plot.py +0 -0
  22. {geocif-0.2.32 → geocif-0.2.34}/geocif/agmet/utils.py +0 -0
  23. {geocif-0.2.32 → geocif-0.2.34}/geocif/analysis.py +0 -0
  24. {geocif-0.2.32 → geocif-0.2.34}/geocif/backup/__init__.py +0 -0
  25. {geocif-0.2.32 → geocif-0.2.34}/geocif/backup/constants.py +0 -0
  26. {geocif-0.2.32 → geocif-0.2.34}/geocif/backup/features.py +0 -0
  27. {geocif-0.2.32 → geocif-0.2.34}/geocif/backup/geo.py +0 -0
  28. {geocif-0.2.32 → geocif-0.2.34}/geocif/backup/geocif.py +0 -0
  29. {geocif-0.2.32 → geocif-0.2.34}/geocif/backup/metadata.py +0 -0
  30. {geocif-0.2.32 → geocif-0.2.34}/geocif/backup/models.py +0 -0
  31. {geocif-0.2.32 → geocif-0.2.34}/geocif/cei/__init__.py +0 -0
  32. {geocif-0.2.32 → geocif-0.2.34}/geocif/cei/definitions.py +0 -0
  33. {geocif-0.2.32 → geocif-0.2.34}/geocif/experiments.py +0 -0
  34. {geocif-0.2.32 → geocif-0.2.34}/geocif/geocif_runner.py +0 -0
  35. {geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner.py +0 -0
  36. {geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_angola.py +0 -0
  37. {geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_madagascar.py +0 -0
  38. {geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_malawi.py +0 -0
  39. {geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_mozambique.py +0 -0
  40. {geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_south_africa.py +0 -0
  41. {geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_zambia.py +0 -0
  42. {geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_zimbabwe.py +0 -0
  43. {geocif-0.2.32 → geocif-0.2.34}/geocif/logger.py +0 -0
  44. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/__init__.py +0 -0
  45. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/feature_selection.py +1 -1
  46. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/outliers.py +0 -0
  47. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/outlook.py +0 -0
  48. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/output.py +0 -0
  49. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/spatial_autocorrelation.py +0 -0
  50. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/stages.py +0 -0
  51. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/stats.py +0 -0
  52. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/trainers.py +0 -0
  53. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/trend.py +0 -0
  54. {geocif-0.2.32 → geocif-0.2.34}/geocif/ml/xai.py +0 -0
  55. {geocif-0.2.32 → geocif-0.2.34}/geocif/mm.py +0 -0
  56. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/__init__.py +0 -0
  57. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/aa.py +0 -0
  58. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/area.py +0 -0
  59. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/automl.py +0 -0
  60. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/download_esi.py +0 -0
  61. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/enso.py +0 -0
  62. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/eval.py +0 -0
  63. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/gamtest.py +0 -0
  64. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/gee_access.py +0 -0
  65. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/misc.py +0 -0
  66. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/play_xagg.py +0 -0
  67. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/reg.py +0 -0
  68. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/sustain.py +0 -0
  69. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/test_catboost.py +0 -0
  70. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/tmp.py +0 -0
  71. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/tmp2.py +0 -0
  72. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/tmp3.py +0 -0
  73. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/tmp4.py +0 -0
  74. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/tmp5.py +0 -0
  75. {geocif-0.2.32 → geocif-0.2.34}/geocif/playground/wolayita_maize_mask.py +0 -0
  76. {geocif-0.2.32 → geocif-0.2.34}/geocif/risk/__init__.py +0 -0
  77. {geocif-0.2.32 → geocif-0.2.34}/geocif/risk/impact_assessment.py +0 -0
  78. {geocif-0.2.32 → geocif-0.2.34}/geocif/utils.py +0 -0
  79. {geocif-0.2.32 → geocif-0.2.34}/geocif/viz/__init__.py +0 -0
  80. {geocif-0.2.32 → geocif-0.2.34}/geocif/viz/gt.py +0 -0
  81. {geocif-0.2.32 → geocif-0.2.34}/geocif/viz/tmp.py +0 -0
  82. {geocif-0.2.32 → geocif-0.2.34}/geocif.egg-info/dependency_links.txt +0 -0
  83. {geocif-0.2.32 → geocif-0.2.34}/geocif.egg-info/not-zip-safe +0 -0
  84. {geocif-0.2.32 → geocif-0.2.34}/geocif.egg-info/top_level.txt +0 -0
  85. {geocif-0.2.32 → geocif-0.2.34}/requirements.txt +0 -0
  86. {geocif-0.2.32 → geocif-0.2.34}/setup.cfg +0 -0
  87. {geocif-0.2.32 → geocif-0.2.34}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.2.32
3
+ Version: 0.2.34
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -94,7 +94,7 @@ def standardize_dataframe(df: pd.DataFrame, vi_var: str) -> pd.DataFrame:
94
94
  if df[vi_var].max() > 1:
95
95
  df[vi_var] = (df[vi_var] - 50) / 200
96
96
 
97
- # Exclude seasons before 2001 if that’s your logic
97
+ # HACK Exclude seasons before 2001
98
98
  df = df[df["Season"] >= 2001]
99
99
 
100
100
  return df
@@ -507,7 +507,7 @@ class CEIs:
507
507
  if not self.redo:
508
508
  # If harvest_year is older than last year and file exists, skip
509
509
  if (self.harvest_year < (current_year - 1)) and cei_file.is_file():
510
- logger.info("CEI file exists and year is old. Skipping: %s", cei_file)
510
+ logger.info(f"CEI file exists, skipping: {cei_file}")
511
511
  return None
512
512
 
513
513
  return intermediate_file
@@ -804,14 +804,6 @@ class Geocif:
804
804
  mask_train = self.df_train["Region_ID"] == region
805
805
  mask_test = self.df_test["Region_ID"] == region
806
806
 
807
- if self.cluster_strategy == "individual":
808
- region_name = self.df_train["Region"].unique()[idx]
809
- pbar.set_description(f"Fit/Predict for {region_name}")
810
- pbar.update()
811
- elif self.cluster_strategy in ["auto_detect", "single"]:
812
- pbar.set_description(f"Fit/Predict for group {idx + 1}")
813
- pbar.update()
814
-
815
807
  common_columns = (
816
808
  [self.target, self.target_class]
817
809
  + self.statistics_columns
@@ -844,6 +836,14 @@ class Geocif:
844
836
  self.y_train = df_region_train[self.target_column]
845
837
 
846
838
  self.apply_feature_selector(region, dir_output)
839
+
840
+ if self.cluster_strategy == "individual":
841
+ region_name = self.df_train["Region"].unique()[idx]
842
+ pbar.set_description(f"Fit/Predict for {region_name}")
843
+ pbar.update()
844
+ elif self.cluster_strategy in ["auto_detect", "single"]:
845
+ pbar.set_description(f"Fit/Predict for group {idx + 1}")
846
+ pbar.update()
847
847
  self.train_model(df_region_train, dir_output, scaler)
848
848
 
849
849
  """ Predict """
@@ -1043,17 +1043,27 @@ class Geocif:
1043
1043
 
1044
1044
  if self.median_area_as_feature:
1045
1045
  df = fe.compute_median_statistics(
1046
- df, self.all_seasons_with_yield, self.number_median_years, "Area (ha)"
1046
+ df,
1047
+ self.all_seasons_with_yield,
1048
+ self.number_median_years,
1049
+ "Area (ha)"
1047
1050
  )
1048
1051
 
1049
1052
  if self.lag_yield_as_feature:
1050
1053
  df = fe.compute_lag_yield(
1051
- df, self.all_seasons_with_yield, self.number_lag_years, self.target
1054
+ df,
1055
+ self.all_seasons_with_yield,
1056
+ self.forecast_season,
1057
+ self.number_lag_years,
1058
+ self.target
1052
1059
  )
1053
1060
 
1054
1061
  if self.analogous_year_yield_as_feature:
1055
1062
  df = fe.compute_analogous_yield(
1056
- df, self.all_seasons_with_yield, self.number_median_years, self.target
1063
+ df,
1064
+ self.all_seasons_with_yield,
1065
+ self.number_median_years,
1066
+ self.target
1057
1067
  )
1058
1068
 
1059
1069
  # Create Region_ID column based on Region column category code
@@ -1361,6 +1371,13 @@ class Geocif:
1361
1371
  self.df_inputs[self.target].notna()
1362
1372
  ]["Harvest Year"].unique()
1363
1373
 
1374
+ # Exclude the current forecast season from all_seasons_with_yield
1375
+ self.all_seasons_with_yield = [
1376
+ season
1377
+ for season in self.all_seasons_with_yield
1378
+ if season != self.forecast_season
1379
+ ]
1380
+
1364
1381
  if self.method.endswith("_r"):
1365
1382
  if self.forecast_season == self.today_year:
1366
1383
  mask = self.df_inputs["Harvest Year"] == self.forecast_season
@@ -295,16 +295,13 @@ def all_correlated_feature_by_time(df, **kwargs):
295
295
  df_tmp2.loc[idx, "Type"] = combined_dict[row[0]][0]
296
296
 
297
297
  # Compute median of each CEI and sort the dataframe based on the absolute value of the median
298
- try:
299
- dict_best_cei[region_id] = (
300
- df_tmp2.groupby("Type")
301
- .max()
302
- .reset_index()
303
- .sort_values("Value", ascending=False)["Metric"]
304
- .values
305
- )
306
- except:
307
- breakpoint()
298
+ dict_best_cei[region_id] = (
299
+ df_tmp2.groupby("Type")
300
+ .max()
301
+ .reset_index()
302
+ .sort_values("Value", ascending=False)["Metric"]
303
+ .values
304
+ )
308
305
 
309
306
  kwargs["region_id"] = region_id
310
307
  _region_names = ", ".join([str(x) for x in group['Region'].unique()])
@@ -32,10 +32,7 @@ def _compute_correlations(X, y):
32
32
  f_series = X[feature]
33
33
 
34
34
  # Ignore NaN values in either y or f_series
35
- try:
36
- mask = ~(np.isnan(y) | np.isnan(f_series))
37
- except:
38
- breakpoint()
35
+ mask = ~(np.isnan(y) | np.isnan(f_series))
39
36
  y_filtered = y[mask]
40
37
  f_series_filtered = f_series[mask]
41
38
 
@@ -39,23 +39,32 @@ def compute_last_year_yield(df, target_col="Yield (tn per ha)"):
39
39
 
40
40
  return df
41
41
 
42
- def compute_closest_years(all_years, harvest_year, number_lag_years):
42
+ def compute_closest_years(all_years, harvest_year, number_lag_years, only_historic=False):
43
43
  """
44
44
  Finds the historical years closest to a given harvest year,
45
- excluding any future year (harvest_year itself and beyond).
45
+ excluding any future year (harvest_year itself and beyond) based on the only_historic flag.
46
46
 
47
47
  Args:
48
48
  all_years (array-like): List or array of all years to consider.
49
49
  harvest_year (int): The year from which to compute distance.
50
50
  number_lag_years (int): Number of closest years to return.
51
+ only_historic (bool): If True, only consider years before the harvest year.
51
52
 
52
53
  Returns:
53
54
  list: The historical years closest to the given harvest year.
54
55
  Returns an empty list if no historical years exist.
55
56
  """
56
57
  # Exclude the harvest year before computation to simplify logic
57
- filtered_years = [year for year in all_years if year != harvest_year]
58
+ if only_historic:
59
+ filtered_years = [year for year in all_years if year < harvest_year]
60
+ else:
61
+ filtered_years = [year for year in all_years if year != harvest_year]
62
+
63
+ # If no historical years exist, return an empty list
64
+ if not filtered_years:
65
+ return []
58
66
 
67
+ # Sort the years based on their absolute difference from the harvest year
59
68
  closest_years = np.array(filtered_years)[
60
69
  np.argsort(np.abs(np.array(filtered_years) - harvest_year))[:number_lag_years]
61
70
  ]
@@ -150,7 +159,7 @@ def compute_user_median_statistics(df, user_years, target_col="Yield (tn per ha)
150
159
 
151
160
 
152
161
  def compute_lag_yield(
153
- df, all_seasons_with_yield, number_lag_years, target_col="Yield (tn per ha)"
162
+ df, all_seasons_with_yield, forecast_season, number_lag_years, target_col="Yield (tn per ha)"
154
163
  ):
155
164
  # For the number of years specified in self.number_lag_years, add the yield of that number of years
156
165
  # ago to the dataframe
@@ -167,9 +176,12 @@ def compute_lag_yield(
167
176
  if group[target_col].isnull().all():
168
177
  continue
169
178
 
179
+ # Exclude the forecast season from the unique years
180
+ unique_years = unique_years[unique_years != forecast_season]
181
+
170
182
  for harvest_year in unique_years:
171
183
  closest_years = compute_closest_years(
172
- all_seasons_with_yield, harvest_year, number_lag_years
184
+ all_seasons_with_yield, harvest_year, number_lag_years, only_historic=True
173
185
  )
174
186
 
175
187
  # For each year in the closest years, add the yield to the dataframe as a new column
@@ -0,0 +1,103 @@
1
+ import ee
2
+ import geemap
3
+ import pandas as pd
4
+ import matplotlib.pyplot as plt
5
+ import datetime
6
+
7
+ # 1. Initialize Earth Engine
8
+ ee.Initialize(project="ee-rit")
9
+
10
+ # 2. Load your study region
11
+ region = ee.FeatureCollection('projects/ee-rit/assets/wolayita')
12
+
13
+ # 3. Load your cropland‐mask raster (1=crop, other values = non‐crop)
14
+ crop_mask = ee.Image('projects/ee-rit/assets/shabari_maize').rename('cropMask')
15
+
16
+ # 4. Define & merge the two HLS collections, selecting only B3,B4,B5 and filtering clouds
17
+ LAND_HLS = "NASA/HLS/HLSL30/v002"
18
+ SENT_HLS = "NASA/HLS/HLSS30/v002"
19
+ CLOUD_PROP = 'CLOUD_COVERAGE'
20
+ CLOUD_MAX = 30
21
+ BANDS = ['B3', 'B4', 'B5'] # green, red, nir
22
+
23
+ hls = (
24
+ ee.ImageCollection(LAND_HLS)
25
+ .filter(ee.Filter.lt(CLOUD_PROP, CLOUD_MAX))
26
+ .select(BANDS)
27
+ ).merge(
28
+ ee.ImageCollection(SENT_HLS)
29
+ .filter(ee.Filter.lt(CLOUD_PROP, CLOUD_MAX))
30
+ .select(BANDS)
31
+ )
32
+
33
+ # 5. Compute annual median GCVI & NDVI over Apr–Aug, masking only pixels == 1
34
+ def annual_stats(year):
35
+ year = ee.Number(year)
36
+ start = ee.Date.fromYMD(year, 4, 1)
37
+ end = ee.Date.fromYMD(year, 8, 31)
38
+
39
+ # 5.1 median composite
40
+ img = hls.filterDate(start, end).median()
41
+
42
+ # 5.2 compute indices
43
+ gcvi = img.expression(
44
+ '(nir/green) - 1',
45
+ {'nir': img.select('B5'),
46
+ 'green': img.select('B3')}
47
+ ).rename('GCVI')
48
+ ndvi = img.normalizedDifference(['B5', 'B4']).rename('NDVI')
49
+ indices = gcvi.addBands(ndvi)
50
+
51
+ # 5.3 build binary mask (pixel==1)
52
+ mask_binary = crop_mask.eq(1)
53
+
54
+ # 5.4 apply mask & reduce to median
55
+ masked = indices.updateMask(mask_binary)
56
+ stats = masked.reduceRegion(
57
+ reducer=ee.Reducer.median(),
58
+ geometry=region.geometry(),
59
+ scale=30,
60
+ maxPixels=1e13
61
+ )
62
+
63
+ return ee.Feature(None, {
64
+ 'year': year,
65
+ 'median_GCVI': stats.get('GCVI'),
66
+ 'median_NDVI': stats.get('NDVI')
67
+ })
68
+
69
+ # 6. Build the collection & pull into pandas
70
+ years = ee.List.sequence(2013, datetime.datetime.now().year)
71
+ fc = ee.FeatureCollection(years.map(annual_stats))
72
+
73
+ data = fc.getInfo()['features']
74
+ df = pd.DataFrame([f['properties'] for f in data])
75
+ df['year'] = df['year'].astype(int)
76
+
77
+ # 7. Coerce to numeric and drop years without data
78
+ df['median_GCVI'] = pd.to_numeric(df['median_GCVI'], errors='coerce')
79
+ df['median_NDVI'] = pd.to_numeric(df['median_NDVI'], errors='coerce')
80
+ df_valid = df.dropna(subset=['median_GCVI','median_NDVI']).sort_values('year')
81
+
82
+ # 8. Export to Drive
83
+ ee.batch.Export.table.toDrive(
84
+ collection=fc,
85
+ description='HLS_CropMask_Medians_AprAug',
86
+ folder='EarthEngineOutputs',
87
+ fileNamePrefix='hls_crop_medians_apr_aug',
88
+ fileFormat='CSV'
89
+ ).start()
90
+
91
+ # 9. Plot bar charts
92
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
93
+ ax1.bar(df_valid['year'], df_valid['median_GCVI'])
94
+ ax1.set_title('Median GCVI by Year (Apr–Aug)')
95
+ ax1.set_ylabel('GCVI')
96
+
97
+ ax2.bar(df_valid['year'], df_valid['median_NDVI'])
98
+ ax2.set_title('Median NDVI by Year (Apr–Aug)')
99
+ ax2.set_ylabel('NDVI')
100
+ ax2.set_xlabel('Year')
101
+
102
+ plt.tight_layout()
103
+ plt.show()
@@ -0,0 +1,80 @@
1
+ import ee
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import datetime
5
+ import seaborn as sns
6
+
7
+ # Initialize EE
8
+ ee.Initialize(project='ee-rit')
9
+
10
+ # 1. Load your region polygons (ADM3 level)
11
+ regions = ee.FeatureCollection('projects/ee-rit/assets/wolayita')
12
+
13
+ # 2. Load your crop mask and HLS collections
14
+ crop_mask = ee.Image('projects/ee-rit/assets/shabari_maize').eq(1)
15
+ LAND_HLS = "NASA/HLS/HLSL30/v002"
16
+ SENT_HLS = "NASA/HLS/HLSS30/v002"
17
+ CLOUD_PROP = 'CLOUD_COVERAGE'
18
+ CLOUD_MAX = 30
19
+ BANDS = ['B3', 'B4', 'B5'] # green, red, nir
20
+
21
+ hls = (
22
+ ee.ImageCollection(LAND_HLS).filter(ee.Filter.lt(CLOUD_PROP, CLOUD_MAX)).select(BANDS)
23
+ ).merge(
24
+ ee.ImageCollection(SENT_HLS).filter(ee.Filter.lt(CLOUD_PROP, CLOUD_MAX)).select(BANDS)
25
+ )
26
+
27
+ # 3. Function: for a given region feature, return a FC of (year, ADM3_EN, mean_NDVI)
28
+ years = ee.List.sequence(2013, datetime.datetime.now().year)
29
+ def stats_for_region(feat):
30
+ def stats_for_year(y):
31
+ y = ee.Number(y)
32
+ # composite and NDVI
33
+ img = hls.filterDate(ee.Date.fromYMD(y,4,1), ee.Date.fromYMD(y,8,31)).median()
34
+ ndvi = img.normalizedDifference(['B5','B4']).rename('NDVI')
35
+ # mask to cropland
36
+ ndvi = ndvi.updateMask(crop_mask)
37
+ # mean over this feature
38
+ max_ndvi = ndvi.reduceRegion(
39
+ ee.Reducer.max(),
40
+ geometry=feat.geometry(),
41
+ scale=30,
42
+ maxPixels=1e13
43
+ ).get('NDVI')
44
+ return ee.Feature(None, {
45
+ 'ADM3_EN': feat.get('ADM3_EN'),
46
+ 'year': y,
47
+ 'max_NDVI': max_ndvi
48
+ })
49
+ return ee.FeatureCollection(years.map(stats_for_year))
50
+
51
+ # 4. Build the full collection and fetch
52
+ fc = regions.map(stats_for_region).flatten()
53
+ data = fc.getInfo()['features']
54
+ df = pd.DataFrame([f['properties'] for f in data])
55
+ df['year'] = df['year'].astype(int)
56
+ df['max_NDVI'] = pd.to_numeric(df['max_NDVI'], errors='coerce')
57
+
58
+ # 5. Pivot to matrix: rows=ADM3_EN, cols=year
59
+ mat = df.pivot(index='ADM3_EN', columns='year', values='mean_NDVI')
60
+
61
+ # 6. Plot heatmap
62
+
63
+ plt.figure(figsize=(12, 10))
64
+
65
+ # Draw heatmap with annotations, two decimal places
66
+ sns.heatmap(
67
+ mat,
68
+ annot=True,
69
+ fmt=".2f",
70
+ cbar_kws={'label': 'Mean NDVI'},
71
+ linewidths=0.5, # optional: grid lines between cells
72
+ linecolor='gray' # optional: grid line color
73
+ )
74
+
75
+ plt.title('Apr–Aug Mean NDVI by ADM3_EN and Year')
76
+ plt.xlabel('')
77
+ plt.ylabel('Woreda')
78
+ plt.xticks(rotation=45, ha='right')
79
+ plt.tight_layout()
80
+ plt.show()
@@ -0,0 +1,219 @@
1
+ import ee
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import datetime
7
+ from tqdm import tqdm
8
+ import math
9
+
10
+ # -----------------------------------------------------------------------------
11
+ # SETTINGS: choose one
12
+ # -----------------------------------------------------------------------------
13
+ MODE = "sample" # "sample" = raw pixel draws; "percentile" = server‐side max‐NDVI heatmap
14
+
15
+ YEARS = list(range(2013, datetime.datetime.now().year + 1))
16
+ MONTHS = [4, 5, 6, 7, 8] # Apr–Aug (used only in sample mode)
17
+ SAMPLES_PER_COMBO = 500 # for MODE="sample"
18
+ # -----------------------------------------------------------------------------
19
+
20
+ # -----------------------------------------------------------------------------
21
+ # 1. Initialize & Constants
22
+ # -----------------------------------------------------------------------------
23
+ ee.Initialize(project='ee-rit')
24
+
25
+ REGIONS = ee.FeatureCollection('projects/ee-rit/assets/wolayita')
26
+ CROP_MASK = ee.Image('projects/ee-rit/assets/shabari_maize').eq(1)
27
+ HLS = (
28
+ ee.ImageCollection("NASA/HLS/HLSL30/v002")
29
+ .filter(ee.Filter.lt('CLOUD_COVERAGE', 100))
30
+ .select(['B3','B4','B5'])
31
+ ).merge(
32
+ ee.ImageCollection("NASA/HLS/HLSS30/v002")
33
+ .filter(ee.Filter.lt('CLOUD_COVERAGE', 100))
34
+ .select(['B3','B4','B5'])
35
+ )
36
+
37
+ # -----------------------------------------------------------------------------
38
+ # 2. Inline yield values (tn per ha) for ADM3_EN from 2004 to 2021
39
+ # -----------------------------------------------------------------------------
40
+ yield_dict = {
41
+ 'Bolossa Bonibe': {2008:12.10, 2009:15.45, 2010:10.12, 2011:15.68, 2012:10.94,
42
+ 2013:24.82, 2014:20.33, 2015:12.20, 2017:11.47, 2021:12.53},
43
+ 'Bolossa Sore': {2004:17.54, 2005:18.24, 2006:17.88, 2007:20.97, 2008:12.10,
44
+ 2009:18.68, 2010:18.55, 2012:16.83, 2013:18.95, 2014:20.75,
45
+ 2015:19.92, 2016:12.92, 2017:11.24, 2018:19.96, 2019:20.21,
46
+ 2020:28.06, 2021:27.20},
47
+ 'Damot Gale': {2004:13.81, 2005:15.88, 2006:15.57, 2007:24.63, 2008:10.07,
48
+ 2009:17.67, 2010:14.72, 2011:6.03, 2012:23.06, 2013:23.52,
49
+ 2014:25.07, 2015:22.12, 2016:27.19, 2017:20.45, 2018:19.96,
50
+ 2019:20.20, 2020:28.06, 2021:29.77},
51
+ 'Damot Pulasa': {2008:11.55, 2009:14.44, 2010:11.92, 2011:12.21, 2012:12.91,
52
+ 2013:26.50, 2014:24.08, 2015:12.99, 2016:7.73, 2017:20.45,
53
+ 2018:19.96, 2019:39.52, 2020:28.06, 2021:9.32},
54
+ 'Damot Sore': {2008:8.51, 2009:12.36, 2010:12.23, 2011:21.52, 2012:18.03,
55
+ 2013:16.06, 2014:17.79, 2015:26.63, 2016:17.36, 2017:20.45,
56
+ 2018:8.49, 2019:21.28, 2021:14.99},
57
+ 'Damot Woyide': {2004:18.89, 2005:12.35, 2006:12.11, 2007:14.42, 2008:16.56,
58
+ 2009:19.32, 2010:8.98, 2012:22.04, 2013:23.37, 2014:21.44,
59
+ 2015:23.79, 2016:22.25, 2017:19.60, 2018:19.87, 2019:15.12,
60
+ 2020:28.06, 2021:14.06},
61
+ 'Deguna Fanigo': {2008:10.60, 2009:16.29, 2010:21.87, 2011:18.46, 2012:17.38,
62
+ 2013:27.01, 2014:16.38, 2015:17.25, 2016:22.06, 2017:27.30,
63
+ 2018:28.83, 2019:14.99, 2020:34.81, 2021:30.95},
64
+ 'Humbo': {2004:23.50, 2005:16.38, 2006:16.06, 2007:13.51, 2008:10.33,
65
+ 2009:17.99, 2010:14.83, 2011:7.56, 2012:27.13, 2013:16.62,
66
+ 2014:13.70, 2015:14.28, 2016:19.61, 2017:17.82, 2018:27.96,
67
+ 2019:16.03, 2020:12.45, 2021:23.16},
68
+ 'Kindo Didaye': {2008:11.13, 2009:18.47, 2010:10.92, 2011:12.21, 2012:27.17,
69
+ 2015:20.22, 2016:16.98, 2018:17.68},
70
+ 'Kindo Koyisha': {2004:14.51, 2005:13.04, 2006:12.78, 2007:23.89, 2008:12.10,
71
+ 2009:12.77, 2010:19.19, 2012:15.28, 2013:19.41, 2014:20.47,
72
+ 2015:20.61, 2016:13.79, 2017:20.26, 2018:22.40, 2019:22.49,
73
+ 2020:22.96, 2021:24.39},
74
+ 'Ofa': {2004:27.61, 2005:10.39, 2006:10.19, 2007:19.48, 2009:5.62,
75
+ 2010:17.77, 2011:16.99, 2012:19.59, 2013:17.59, 2014:9.26,
76
+ 2015:25.25, 2016:23.82, 2017:14.18, 2018:19.96, 2019:20.32,
77
+ 2021:20.47},
78
+ 'Sodo Zuriya': {2004:6.39, 2005:7.75, 2006:7.59, 2007:11.11, 2008:18.66,
79
+ 2009:17.86, 2010:14.61, 2011:20.24, 2012:15.88, 2013:22.42,
80
+ 2014:22.62, 2015:33.23, 2016:21.68, 2017:20.63, 2018:11.16,
81
+ 2019:19.82, 2020:16.35, 2021:19.69}
82
+ }
83
+ df_yield = (
84
+ pd.DataFrame.from_dict(yield_dict, orient='index')
85
+ .reset_index().rename(columns={'index':'ADM3_EN'})
86
+ .melt(id_vars='ADM3_EN', var_name='year', value_name='yield')
87
+ .dropna(subset=['yield'])
88
+ )
89
+ df_yield['year'] = df_yield['year'].astype(int)
90
+
91
+ # -----------------------------------------------------------------------------
92
+ # 2A. MODE="sample": raw‐pixel sampling
93
+ # -----------------------------------------------------------------------------
94
+ def fetch_ndvi_samples(regions_fc, hls_ic, crop_mask, years, months, n_samples):
95
+ rows = []
96
+ feats = regions_fc.getInfo().get('features', [])
97
+ for feat in feats:
98
+ name = feat['properties'].get('ADM3_EN')
99
+ geom = ee.Feature(feat).geometry()
100
+ for y in tqdm(years, desc="years", leave=False):
101
+ for m in tqdm(months, desc="months", leave=False):
102
+ coll = hls_ic.filterDate(
103
+ ee.Date.fromYMD(y, m, 1),
104
+ ee.Date.fromYMD(y, m, 1).advance(1, 'month')
105
+ )
106
+ if coll.size().getInfo() == 0:
107
+ continue
108
+ ndvi = (
109
+ coll.median()
110
+ .normalizedDifference(['B5', 'B4'])
111
+ .rename('NDVI')
112
+ .updateMask(crop_mask)
113
+ )
114
+ samples = ndvi.sample(
115
+ region=geom,
116
+ scale=30,
117
+ numPixels=n_samples,
118
+ seed=42
119
+ ).getInfo().get('features', [])
120
+ for s in samples:
121
+ rows.append({
122
+ 'ADM3_EN': name,
123
+ 'year': y,
124
+ 'month': m,
125
+ 'ndvi': s['properties'].get('NDVI')
126
+ })
127
+ return pd.DataFrame(rows, columns=['ADM3_EN', 'year', 'month', 'ndvi'])
128
+
129
+ # -----------------------------------------------------------------------------
130
+ # 2B. MODE="percentile": server‐side max‐NDVI heatmap
131
+ # -----------------------------------------------------------------------------
132
+ def fetch_ndvi_max(regions_fc, hls_ic, crop_mask, years):
133
+ rows = []
134
+ for y in tqdm(years, desc="years"):
135
+ img = hls_ic.filterDate(f'{y}-04-01', f'{y}-08-31').median()
136
+ ndvi = (
137
+ img.normalizedDifference(['B5', 'B4'])
138
+ .rename('NDVI')
139
+ .updateMask(crop_mask)
140
+ )
141
+ stats_fc = ndvi.reduceRegions(
142
+ collection=regions_fc,
143
+ reducer=ee.Reducer.max().setOutputs(['max_NDVI']),
144
+ scale=30,
145
+ tileScale=8
146
+ ).map(lambda f: f.set('year', y))
147
+ for feat in stats_fc.getInfo().get('features', []):
148
+ props = feat['properties']
149
+ val = props.get('max_NDVI')
150
+ max_ndvi = np.nan if val is None else float(val)
151
+ rows.append({
152
+ 'ADM3_EN': props.get('ADM3_EN'),
153
+ 'year': int(props.get('year')),
154
+ 'max_NDVI': max_ndvi
155
+ })
156
+ return pd.DataFrame(rows, columns=['ADM3_EN', 'year', 'max_NDVI'])
157
+
158
+ # -----------------------------------------------------------------------------
159
+ # 3. Run selected mode and plot
160
+ # -----------------------------------------------------------------------------
161
+ if MODE == "sample":
162
+ df = fetch_ndvi_samples(REGIONS, HLS, CROP_MASK, YEARS, MONTHS, SAMPLES_PER_COMBO)
163
+ if df.empty:
164
+ raise ValueError("No samples -- check your mask/region.")
165
+ sns.catplot(
166
+ x='month', y='ndvi', col='year',
167
+ data=df.dropna(subset=['ndvi']), kind='box',
168
+ col_wrap=4, sharey=True,
169
+ height=3.5, aspect=1
170
+ ).fig.suptitle("Monthly NDVI Distributions by Year and Woreda", y=1.02)
171
+ plt.show()
172
+
173
+ elif MODE == "percentile":
174
+ # 3.1 Fetch max‐NDVI
175
+ dfm = fetch_ndvi_max(REGIONS, HLS, CROP_MASK, YEARS)
176
+ if dfm.empty:
177
+ raise ValueError("No max‐NDVI values -- check your mask/region.")
178
+
179
+ # 3.2 Heatmap: Max NDVI
180
+ mat_ndvi = dfm.pivot(index='ADM3_EN', columns='year', values='max_NDVI')
181
+ plt.figure(figsize=(12,10))
182
+ sns.heatmap(mat_ndvi, annot=True, fmt=".2f", linewidths=0.5,
183
+ linecolor='gray', cbar_kws={'label':'Max NDVI'})
184
+ plt.title('Apr–Aug Max NDVI by Woreda and Year')
185
+ plt.xlabel('Year'); plt.ylabel('Woreda'); plt.xticks(rotation=45)
186
+ plt.tight_layout(); plt.show()
187
+
188
+ # 3.3 Heatmap: Yield
189
+ mat_yield = df_yield.pivot(index='ADM3_EN', columns='year', values='yield')
190
+ plt.figure(figsize=(12,10))
191
+ sns.heatmap(mat_yield, annot=True, fmt=".2f", linewidths=0.5,
192
+ linecolor='gray', cbar_kws={'label':'Yield (tn/ha)'})
193
+ plt.title('Crop Yield by Woreda and Year')
194
+ plt.xlabel('Year'); plt.ylabel('Woreda'); plt.xticks(rotation=45)
195
+ plt.tight_layout(); plt.show()
196
+
197
+ # 3.4 Scatter: one subplot per region (max 5 per row)
198
+ df_merge = pd.merge(df_yield, dfm, on=['ADM3_EN','year'], how='outer')
199
+ regions = sorted(df_merge['ADM3_EN'].unique())
200
+ n_regions = len(regions)
201
+ ncols = 5
202
+ nrows = math.ceil(n_regions / ncols)
203
+ fig, axes = plt.subplots(nrows, ncols,
204
+ figsize=(5*ncols, 4*nrows),
205
+ sharex=True, sharey=True)
206
+ axes = axes.flatten()
207
+ for ax, region in zip(axes, regions):
208
+ sub = df_merge[df_merge['ADM3_EN'] == region]
209
+ sns.scatterplot(data=sub, x='yield', y='max_NDVI', ax=ax)
210
+ ax.set_title(region)
211
+ ax.set_xlabel('Yield (tn/ha)')
212
+ ax.set_ylabel('Max NDVI')
213
+ for ax in axes[len(regions):]:
214
+ ax.set_visible(False)
215
+ plt.tight_layout()
216
+ plt.show()
217
+
218
+ else:
219
+ raise ValueError(f"Unknown MODE={MODE!r}")
@@ -395,23 +395,16 @@ def plot_df_shpfile(
395
395
  )
396
396
  _name_country.append(cntr.replace(" ", "_").lower())
397
397
 
398
- # Hack
399
- if _name_country[0] == "russian_federation":
400
- extent = [20, 80, 40, 80]
401
- else:
402
- extent = rgeo.get_country_lat_lon_extent(
403
- _name_country, buffer=1.0
404
- ) # left, right, bottom, top
405
-
406
- # Hack: Add space to the top for adding title
407
- extent[3] = extent[3] + 2
408
- # Add some space to the bottom for adding legend and colorbar
409
- extent[2] = extent[2] - 3
410
-
411
- try:
412
- ax.set_extent(extent)
413
- except:
414
- breakpoint()
398
+ extent = rgeo.get_country_lat_lon_extent(
399
+ _name_country, buffer=1.0
400
+ ) # left, right, bottom, top
401
+
402
+ # Hack: Add space to the top for adding title
403
+ extent[3] = extent[3] + 2
404
+ # Add some space to the bottom for adding legend and colorbar
405
+ extent[2] = extent[2] - 3
406
+
407
+ ax.set_extent(extent)
415
408
  elif name_country == "world":
416
409
  ax.add_feature(
417
410
  cartopy.feature.LAND.with_scale("50m"), color="white"
@@ -0,0 +1,95 @@
1
+ import sqlite3
2
+ import pandas as pd
3
+ import panel as pn
4
+ import matplotlib.pyplot as plt
5
+
6
+ # Enable Panel's Matplotlib support
7
+
8
+
9
+ # Connect to the SQLite database
10
+ conn = sqlite3.connect(r'D:\Users\ritvik\projects\GEOGLAM\Output\ml\db\presentation_v2.db')
11
+
12
+ # Find every table except config*
13
+ all_tables = pd.read_sql_query(
14
+ "SELECT name FROM sqlite_master WHERE type='table';",
15
+ conn
16
+ )['name'].tolist()
17
+ data_tables = [t for t in all_tables if not t.lower().startswith('config')]
18
+
19
+ # Columns we need in each table
20
+ required = {
21
+ 'Country',
22
+ 'Crop',
23
+ 'Harvest Year',
24
+ 'Observed Yield (tn per ha)',
25
+ 'Predicted Yield (tn per ha)'
26
+ }
27
+
28
+ frames = []
29
+ for tbl in data_tables:
30
+ cols = pd.read_sql_query(f"PRAGMA table_info('{tbl}');", conn)['name'].tolist()
31
+ if required.issubset(cols):
32
+ df = pd.read_sql_query(f"""
33
+ SELECT
34
+ Country,
35
+ Crop,
36
+ [Harvest Year] AS year,
37
+ [Observed Yield (tn per ha)] AS observed,
38
+ [Predicted Yield (tn per ha)] AS predicted
39
+ FROM "{tbl}"
40
+ """, conn)
41
+ frames.append(df)
42
+
43
+ if not frames:
44
+ raise ValueError("No tables found with the required schema!")
45
+
46
+ df_all = pd.concat(frames, ignore_index=True)
47
+ print(df_all)
48
+ conn.close()
49
+
50
+ # 3. Build Panel widgets
51
+ country_select = pn.widgets.Select(
52
+ name='Country',
53
+ options=sorted(df_all['Country'].unique())
54
+ )
55
+ crop_select = pn.widgets.Select(name='Crop', options=[])
56
+ year_select = pn.widgets.Select(
57
+ name='Year',
58
+ options=sorted(df_all['year'].astype(str).unique())
59
+ )
60
+
61
+ # When Country changes, update Crop list
62
+ @pn.depends(country_select.param.value, watch=True)
63
+ def update_crops(country):
64
+ crops = sorted(df_all[df_all['Country'] == country]['Crop'].unique())
65
+ crop_select.options = crops
66
+ if crops:
67
+ crop_select.value = crops[0]
68
+
69
+ update_crops(country_select.value)
70
+
71
+ # 4. Scatter plot: Observed vs Predicted
72
+ @pn.depends(
73
+ country_select.param.value,
74
+ crop_select.param.value,
75
+ year_select.param.value
76
+ )
77
+ def scatter_plot(country, crop, year):
78
+ year = int(year)
79
+ # Change year column to dtype int
80
+ df_all['year'] = df_all['year'].astype(int)
81
+ df = df_all[(df_all['Country'] == country) & (df_all['Crop'] == crop)&(df_all['year'] == year)]
82
+ fig, ax = plt.subplots()
83
+ ax.scatter(df['observed'], df['predicted'])
84
+ ax.set_xlabel('Observed Yield (tn per ha)')
85
+ ax.set_ylabel('Predicted Yield (tn per ha)')
86
+ ax.set_title(f'{crop} in {country}, {year}')
87
+ return fig
88
+
89
+ # 5. Assemble & serve
90
+ dashboard = pn.Column(
91
+ pn.Row(country_select, crop_select, year_select),
92
+ scatter_plot
93
+ )
94
+
95
+ dashboard.servable()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.2.32
3
+ Version: 0.2.34
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -72,11 +72,15 @@ geocif/playground/tmp2.py
72
72
  geocif/playground/tmp3.py
73
73
  geocif/playground/tmp4.py
74
74
  geocif/playground/tmp5.py
75
+ geocif/playground/wolayita.py
75
76
  geocif/playground/wolayita_maize_mask.py
77
+ geocif/playground/wolayita_v2.py
78
+ geocif/playground/wolayita_v3.py
76
79
  geocif/risk/__init__.py
77
80
  geocif/risk/impact_assessment.py
78
81
  geocif/viz/__init__.py
79
82
  geocif/viz/gt.py
80
83
  geocif/viz/plot.py
81
84
  geocif/viz/tmp.py
85
+ geocif/viz/viz_ml.py
82
86
  tests/test_geocif.py
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.2.32",
53
+ version="0.2.34",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -253,6 +253,7 @@ def select_features(
253
253
  elif method == "Leshy":
254
254
  import arfs.feature_selection.allrelevant as arfsgroot
255
255
  from catboost import CatBoostRegressor
256
+
256
257
  model = CatBoostRegressor(n_estimators=350, verbose=0, use_best_model=False)
257
258
  sel = arfsgroot.Leshy(
258
259
  model,
@@ -264,7 +265,6 @@ def select_features(
264
265
  )
265
266
  sel.fit(X_clean, y)
266
267
  selected = sel.get_feature_names_out()
267
-
268
268
  elif method == "PowerShap":
269
269
  from powershap import PowerShap
270
270
  from catboost import CatBoostRegressor
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes