PyPI - geocif - Versions diffs - 0.2.32__tar.gz → 0.2.34__tar.gz - Mend

geocif 0.2.32tar.gz → 0.2.34tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

{geocif-0.2.32/geocif.egg-info → geocif-0.2.34}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.2.32
+Version: 0.2.34
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.2.32 → geocif-0.2.34}/geocif/cei/indices.py RENAMED Viewed

@@ -94,7 +94,7 @@ def standardize_dataframe(df: pd.DataFrame, vi_var: str) -> pd.DataFrame:
         if df[vi_var].max() > 1:
             df[vi_var] = (df[vi_var] - 50) / 200
-    # Exclude seasons before 2001 if that’s your logic
+    # HACK Exclude seasons before 2001
     df = df[df["Season"] >= 2001]
     return df
@@ -507,7 +507,7 @@ class CEIs:
         if not self.redo:
             # If harvest_year is older than last year and file exists, skip
             if (self.harvest_year < (current_year - 1)) and cei_file.is_file():
-                logger.info("CEI file exists and year is old. Skipping: %s", cei_file)
+                logger.info(f"CEI file exists, skipping: {cei_file}")
                 return None
         return intermediate_file

{geocif-0.2.32 → geocif-0.2.34}/geocif/geocif.py RENAMED Viewed

@@ -804,14 +804,6 @@ class Geocif:
             mask_train = self.df_train["Region_ID"] == region
             mask_test = self.df_test["Region_ID"] == region
-            if self.cluster_strategy == "individual":
-                region_name = self.df_train["Region"].unique()[idx]
-                pbar.set_description(f"Fit/Predict for {region_name}")
-                pbar.update()
-            elif self.cluster_strategy in ["auto_detect", "single"]:
-                pbar.set_description(f"Fit/Predict for group {idx + 1}")
-                pbar.update()
             common_columns = (
                 [self.target, self.target_class]
                 + self.statistics_columns
@@ -844,6 +836,14 @@ class Geocif:
             self.y_train = df_region_train[self.target_column]
             self.apply_feature_selector(region, dir_output)
+            if self.cluster_strategy == "individual":
+                region_name = self.df_train["Region"].unique()[idx]
+                pbar.set_description(f"Fit/Predict for {region_name}")
+                pbar.update()
+            elif self.cluster_strategy in ["auto_detect", "single"]:
+                pbar.set_description(f"Fit/Predict for group {idx + 1}")
+                pbar.update()
             self.train_model(df_region_train, dir_output, scaler)
             """ Predict """
@@ -1043,17 +1043,27 @@ class Geocif:
         if self.median_area_as_feature:
             df = fe.compute_median_statistics(
-                df, self.all_seasons_with_yield, self.number_median_years, "Area (ha)"
+                df,
+                self.all_seasons_with_yield,
+                self.number_median_years,
+                "Area (ha)"
             )
         if self.lag_yield_as_feature:
             df = fe.compute_lag_yield(
-                df, self.all_seasons_with_yield, self.number_lag_years, self.target
+                df,
+                self.all_seasons_with_yield,
+                self.forecast_season,
+                self.number_lag_years,
+                self.target
             )
         if self.analogous_year_yield_as_feature:
             df = fe.compute_analogous_yield(
-                df, self.all_seasons_with_yield, self.number_median_years, self.target
+                df,
+                self.all_seasons_with_yield,
+                self.number_median_years,
+                self.target
             )
         # Create Region_ID column based on Region column category code
@@ -1361,6 +1371,13 @@ class Geocif:
             self.df_inputs[self.target].notna()
         ]["Harvest Year"].unique()
+        # Exclude the current forecast season from all_seasons_with_yield
+        self.all_seasons_with_yield = [
+            season
+            for season in self.all_seasons_with_yield
+            if season != self.forecast_season
+        ]
         if self.method.endswith("_r"):
             if self.forecast_season == self.today_year:
                 mask = self.df_inputs["Harvest Year"] == self.forecast_season

{geocif-0.2.32 → geocif-0.2.34}/geocif/ml/correlations.py RENAMED Viewed

@@ -295,16 +295,13 @@ def all_correlated_feature_by_time(df, **kwargs):
                     df_tmp2.loc[idx, "Type"] = combined_dict[row[0]][0]
                 # Compute median of each CEI and sort the dataframe based on the absolute value of the median
-                try:
-                    dict_best_cei[region_id] = (
-                        df_tmp2.groupby("Type")
-                        .max()
-                        .reset_index()
-                        .sort_values("Value", ascending=False)["Metric"]
-                        .values
-                    )
-                except:
-                    breakpoint()
+                dict_best_cei[region_id] = (
+                    df_tmp2.groupby("Type")
+                    .max()
+                    .reset_index()
+                    .sort_values("Value", ascending=False)["Metric"]
+                    .values
+                )
                 kwargs["region_id"] = region_id
                 _region_names = ", ".join([str(x) for x in group['Region'].unique()])

{geocif-0.2.32 → geocif-0.2.34}/geocif/ml/embedding.py RENAMED Viewed

@@ -32,10 +32,7 @@ def _compute_correlations(X, y):
         f_series = X[feature]
         # Ignore NaN values in either y or f_series
-        try:
-            mask = ~(np.isnan(y) | np.isnan(f_series))
-        except:
-            breakpoint()
+        mask = ~(np.isnan(y) | np.isnan(f_series))
         y_filtered = y[mask]
         f_series_filtered = f_series[mask]

{geocif-0.2.32 → geocif-0.2.34}/geocif/ml/feature_engineering.py RENAMED Viewed

@@ -39,23 +39,32 @@ def compute_last_year_yield(df, target_col="Yield (tn per ha)"):
     return df
-def compute_closest_years(all_years, harvest_year, number_lag_years):
+def compute_closest_years(all_years, harvest_year, number_lag_years, only_historic=False):
     """
     Finds the historical years closest to a given harvest year,
-    excluding any future year (harvest_year itself and beyond).
+    excluding any future year (harvest_year itself and beyond) based on the only_historic flag.
     Args:
         all_years (array-like): List or array of all years to consider.
         harvest_year (int): The year from which to compute distance.
         number_lag_years (int): Number of closest years to return.
+        only_historic (bool): If True, only consider years before the harvest year.
     Returns:
         list: The historical years closest to the given harvest year.
               Returns an empty list if no historical years exist.
     """
     # Exclude the harvest year before computation to simplify logic
-    filtered_years = [year for year in all_years if year != harvest_year]
+    if only_historic:
+        filtered_years = [year for year in all_years if year < harvest_year]
+    else:
+        filtered_years = [year for year in all_years if year != harvest_year]
+    # If no historical years exist, return an empty list
+    if not filtered_years:
+        return []
+    # Sort the years based on their absolute difference from the harvest year
     closest_years = np.array(filtered_years)[
         np.argsort(np.abs(np.array(filtered_years) - harvest_year))[:number_lag_years]
     ]
@@ -150,7 +159,7 @@ def compute_user_median_statistics(df, user_years, target_col="Yield (tn per ha)
 def compute_lag_yield(
-    df, all_seasons_with_yield, number_lag_years, target_col="Yield (tn per ha)"
+    df, all_seasons_with_yield, forecast_season, number_lag_years, target_col="Yield (tn per ha)"
 ):
     # For the number of years specified in self.number_lag_years, add the yield of that number of years
     # ago to the dataframe
@@ -167,9 +176,12 @@ def compute_lag_yield(
         if group[target_col].isnull().all():
             continue
+        # Exclude the forecast season from the unique years
+        unique_years = unique_years[unique_years != forecast_season]
         for harvest_year in unique_years:
             closest_years = compute_closest_years(
-                all_seasons_with_yield, harvest_year, number_lag_years
+                all_seasons_with_yield, harvest_year, number_lag_years, only_historic=True
             )
             # For each year in the closest years, add the yield to the dataframe as a new column

geocif-0.2.34/geocif/playground/wolayita.py ADDED Viewed

@@ -0,0 +1,103 @@
+import ee
+import geemap
+import pandas as pd
+import matplotlib.pyplot as plt
+import datetime
+# 1. Initialize Earth Engine
+ee.Initialize(project="ee-rit")
+# 2. Load your study region
+region = ee.FeatureCollection('projects/ee-rit/assets/wolayita')
+# 3. Load your cropland‐mask raster (1=crop, other values = non‐crop)
+crop_mask = ee.Image('projects/ee-rit/assets/shabari_maize').rename('cropMask')
+# 4. Define & merge the two HLS collections, selecting only B3,B4,B5 and filtering clouds
+LAND_HLS   = "NASA/HLS/HLSL30/v002"
+SENT_HLS   = "NASA/HLS/HLSS30/v002"
+CLOUD_PROP = 'CLOUD_COVERAGE'
+CLOUD_MAX  = 30
+BANDS      = ['B3', 'B4', 'B5']  # green, red, nir
+hls = (
+    ee.ImageCollection(LAND_HLS)
+      .filter(ee.Filter.lt(CLOUD_PROP, CLOUD_MAX))
+      .select(BANDS)
+).merge(
+    ee.ImageCollection(SENT_HLS)
+      .filter(ee.Filter.lt(CLOUD_PROP, CLOUD_MAX))
+      .select(BANDS)
+)
+# 5. Compute annual median GCVI & NDVI over Apr–Aug, masking only pixels == 1
+def annual_stats(year):
+    year = ee.Number(year)
+    start = ee.Date.fromYMD(year, 4, 1)
+    end   = ee.Date.fromYMD(year, 8, 31)
+    # 5.1 median composite
+    img = hls.filterDate(start, end).median()
+    # 5.2 compute indices
+    gcvi = img.expression(
+        '(nir/green) - 1',
+        {'nir':   img.select('B5'),
+         'green': img.select('B3')}
+    ).rename('GCVI')
+    ndvi = img.normalizedDifference(['B5', 'B4']).rename('NDVI')
+    indices = gcvi.addBands(ndvi)
+    # 5.3 build binary mask (pixel==1)
+    mask_binary = crop_mask.eq(1)
+    # 5.4 apply mask & reduce to median
+    masked = indices.updateMask(mask_binary)
+    stats = masked.reduceRegion(
+        reducer=ee.Reducer.median(),
+        geometry=region.geometry(),
+        scale=30,
+        maxPixels=1e13
+    )
+    return ee.Feature(None, {
+        'year':        year,
+        'median_GCVI': stats.get('GCVI'),
+        'median_NDVI': stats.get('NDVI')
+    })
+# 6. Build the collection & pull into pandas
+years = ee.List.sequence(2013, datetime.datetime.now().year)
+fc    = ee.FeatureCollection(years.map(annual_stats))
+data = fc.getInfo()['features']
+df   = pd.DataFrame([f['properties'] for f in data])
+df['year'] = df['year'].astype(int)
+# 7. Coerce to numeric and drop years without data
+df['median_GCVI'] = pd.to_numeric(df['median_GCVI'], errors='coerce')
+df['median_NDVI'] = pd.to_numeric(df['median_NDVI'], errors='coerce')
+df_valid = df.dropna(subset=['median_GCVI','median_NDVI']).sort_values('year')
+# 8. Export to Drive
+ee.batch.Export.table.toDrive(
+    collection=fc,
+    description='HLS_CropMask_Medians_AprAug',
+    folder='EarthEngineOutputs',
+    fileNamePrefix='hls_crop_medians_apr_aug',
+    fileFormat='CSV'
+).start()
+# 9. Plot bar charts
+fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), sharex=True)
+ax1.bar(df_valid['year'], df_valid['median_GCVI'])
+ax1.set_title('Median GCVI by Year (Apr–Aug)')
+ax1.set_ylabel('GCVI')
+ax2.bar(df_valid['year'], df_valid['median_NDVI'])
+ax2.set_title('Median NDVI by Year (Apr–Aug)')
+ax2.set_ylabel('NDVI')
+ax2.set_xlabel('Year')
+plt.tight_layout()
+plt.show()

geocif-0.2.34/geocif/playground/wolayita_v2.py ADDED Viewed

@@ -0,0 +1,80 @@
+import ee
+import pandas as pd
+import matplotlib.pyplot as plt
+import datetime
+import seaborn as sns
+# Initialize EE
+ee.Initialize(project='ee-rit')
+# 1. Load your region polygons (ADM3 level)
+regions = ee.FeatureCollection('projects/ee-rit/assets/wolayita')
+# 2. Load your crop mask and HLS collections
+crop_mask = ee.Image('projects/ee-rit/assets/shabari_maize').eq(1)
+LAND_HLS   = "NASA/HLS/HLSL30/v002"
+SENT_HLS   = "NASA/HLS/HLSS30/v002"
+CLOUD_PROP = 'CLOUD_COVERAGE'
+CLOUD_MAX  = 30
+BANDS      = ['B3', 'B4', 'B5']  # green, red, nir
+hls = (
+    ee.ImageCollection(LAND_HLS).filter(ee.Filter.lt(CLOUD_PROP, CLOUD_MAX)).select(BANDS)
+).merge(
+    ee.ImageCollection(SENT_HLS).filter(ee.Filter.lt(CLOUD_PROP, CLOUD_MAX)).select(BANDS)
+)
+# 3. Function: for a given region feature, return a FC of (year, ADM3_EN, mean_NDVI)
+years = ee.List.sequence(2013, datetime.datetime.now().year)
+def stats_for_region(feat):
+    def stats_for_year(y):
+        y = ee.Number(y)
+        # composite and NDVI
+        img = hls.filterDate(ee.Date.fromYMD(y,4,1), ee.Date.fromYMD(y,8,31)).median()
+        ndvi = img.normalizedDifference(['B5','B4']).rename('NDVI')
+        # mask to cropland
+        ndvi = ndvi.updateMask(crop_mask)
+        # mean over this feature
+        max_ndvi = ndvi.reduceRegion(
+            ee.Reducer.max(),
+            geometry=feat.geometry(),
+            scale=30,
+            maxPixels=1e13
+        ).get('NDVI')
+        return ee.Feature(None, {
+            'ADM3_EN':    feat.get('ADM3_EN'),
+            'year':       y,
+            'max_NDVI':  max_ndvi
+        })
+    return ee.FeatureCollection(years.map(stats_for_year))
+# 4. Build the full collection and fetch
+fc = regions.map(stats_for_region).flatten()
+data = fc.getInfo()['features']
+df = pd.DataFrame([f['properties'] for f in data])
+df['year']       = df['year'].astype(int)
+df['max_NDVI']  = pd.to_numeric(df['max_NDVI'], errors='coerce')
+# 5. Pivot to matrix: rows=ADM3_EN, cols=year
+mat = df.pivot(index='ADM3_EN', columns='year', values='mean_NDVI')
+# 6. Plot heatmap
+plt.figure(figsize=(12, 10))
+# Draw heatmap with annotations, two decimal places
+sns.heatmap(
+    mat,
+    annot=True,
+    fmt=".2f",
+    cbar_kws={'label': 'Mean NDVI'},
+    linewidths=0.5,      # optional: grid lines between cells
+    linecolor='gray'     # optional: grid line color
+)
+plt.title('Apr–Aug Mean NDVI by ADM3_EN and Year')
+plt.xlabel('')
+plt.ylabel('Woreda')
+plt.xticks(rotation=45, ha='right')
+plt.tight_layout()
+plt.show()

geocif-0.2.34/geocif/playground/wolayita_v3.py ADDED Viewed

@@ -0,0 +1,219 @@
+import ee
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import datetime
+from tqdm import tqdm
+import math
+# -----------------------------------------------------------------------------
+# SETTINGS: choose one
+# -----------------------------------------------------------------------------
+MODE = "sample"       # "sample" = raw pixel draws; "percentile" = server‐side max‐NDVI heatmap
+YEARS  = list(range(2013, datetime.datetime.now().year + 1))
+MONTHS = [4, 5, 6, 7, 8]  # Apr–Aug (used only in sample mode)
+SAMPLES_PER_COMBO = 500   # for MODE="sample"
+# -----------------------------------------------------------------------------
+# -----------------------------------------------------------------------------
+# 1. Initialize & Constants
+# -----------------------------------------------------------------------------
+ee.Initialize(project='ee-rit')
+REGIONS   = ee.FeatureCollection('projects/ee-rit/assets/wolayita')
+CROP_MASK = ee.Image('projects/ee-rit/assets/shabari_maize').eq(1)
+HLS = (
+    ee.ImageCollection("NASA/HLS/HLSL30/v002")
+      .filter(ee.Filter.lt('CLOUD_COVERAGE', 100))
+      .select(['B3','B4','B5'])
+).merge(
+    ee.ImageCollection("NASA/HLS/HLSS30/v002")
+      .filter(ee.Filter.lt('CLOUD_COVERAGE', 100))
+      .select(['B3','B4','B5'])
+)
+# -----------------------------------------------------------------------------
+# 2. Inline yield values (tn per ha) for ADM3_EN from 2004 to 2021
+# -----------------------------------------------------------------------------
+yield_dict = {
+    'Bolossa Bonibe': {2008:12.10, 2009:15.45, 2010:10.12, 2011:15.68, 2012:10.94,
+                       2013:24.82, 2014:20.33, 2015:12.20, 2017:11.47, 2021:12.53},
+    'Bolossa Sore':   {2004:17.54, 2005:18.24, 2006:17.88, 2007:20.97, 2008:12.10,
+                       2009:18.68, 2010:18.55, 2012:16.83, 2013:18.95, 2014:20.75,
+                       2015:19.92, 2016:12.92, 2017:11.24, 2018:19.96, 2019:20.21,
+                       2020:28.06, 2021:27.20},
+    'Damot Gale':     {2004:13.81, 2005:15.88, 2006:15.57, 2007:24.63, 2008:10.07,
+                       2009:17.67, 2010:14.72, 2011:6.03,  2012:23.06, 2013:23.52,
+                       2014:25.07, 2015:22.12, 2016:27.19, 2017:20.45, 2018:19.96,
+                       2019:20.20, 2020:28.06, 2021:29.77},
+    'Damot Pulasa':   {2008:11.55, 2009:14.44, 2010:11.92, 2011:12.21, 2012:12.91,
+                       2013:26.50, 2014:24.08, 2015:12.99, 2016:7.73,  2017:20.45,
+                       2018:19.96, 2019:39.52, 2020:28.06, 2021:9.32},
+    'Damot Sore':     {2008:8.51,  2009:12.36, 2010:12.23, 2011:21.52, 2012:18.03,
+                       2013:16.06, 2014:17.79, 2015:26.63, 2016:17.36, 2017:20.45,
+                       2018:8.49,  2019:21.28, 2021:14.99},
+    'Damot Woyide':   {2004:18.89, 2005:12.35, 2006:12.11, 2007:14.42, 2008:16.56,
+                       2009:19.32, 2010:8.98,  2012:22.04, 2013:23.37, 2014:21.44,
+                       2015:23.79, 2016:22.25, 2017:19.60, 2018:19.87, 2019:15.12,
+                       2020:28.06, 2021:14.06},
+    'Deguna Fanigo':  {2008:10.60, 2009:16.29, 2010:21.87, 2011:18.46, 2012:17.38,
+                       2013:27.01, 2014:16.38, 2015:17.25, 2016:22.06, 2017:27.30,
+                       2018:28.83, 2019:14.99, 2020:34.81, 2021:30.95},
+    'Humbo':          {2004:23.50, 2005:16.38, 2006:16.06, 2007:13.51, 2008:10.33,
+                       2009:17.99, 2010:14.83, 2011:7.56,  2012:27.13, 2013:16.62,
+                       2014:13.70, 2015:14.28, 2016:19.61, 2017:17.82, 2018:27.96,
+                       2019:16.03, 2020:12.45, 2021:23.16},
+    'Kindo Didaye':   {2008:11.13, 2009:18.47, 2010:10.92, 2011:12.21, 2012:27.17,
+                       2015:20.22, 2016:16.98, 2018:17.68},
+    'Kindo Koyisha':  {2004:14.51, 2005:13.04, 2006:12.78, 2007:23.89, 2008:12.10,
+                       2009:12.77, 2010:19.19, 2012:15.28, 2013:19.41, 2014:20.47,
+                       2015:20.61, 2016:13.79, 2017:20.26, 2018:22.40, 2019:22.49,
+                       2020:22.96, 2021:24.39},
+    'Ofa':            {2004:27.61, 2005:10.39, 2006:10.19, 2007:19.48, 2009:5.62,
+                       2010:17.77, 2011:16.99, 2012:19.59, 2013:17.59, 2014:9.26,
+                       2015:25.25, 2016:23.82, 2017:14.18, 2018:19.96, 2019:20.32,
+                       2021:20.47},
+    'Sodo Zuriya':    {2004:6.39,  2005:7.75,  2006:7.59,  2007:11.11, 2008:18.66,
+                       2009:17.86, 2010:14.61, 2011:20.24, 2012:15.88, 2013:22.42,
+                       2014:22.62, 2015:33.23, 2016:21.68, 2017:20.63, 2018:11.16,
+                       2019:19.82, 2020:16.35, 2021:19.69}
+}
+df_yield = (
+    pd.DataFrame.from_dict(yield_dict, orient='index')
+      .reset_index().rename(columns={'index':'ADM3_EN'})
+      .melt(id_vars='ADM3_EN', var_name='year', value_name='yield')
+      .dropna(subset=['yield'])
+)
+df_yield['year'] = df_yield['year'].astype(int)
+# -----------------------------------------------------------------------------
+# 2A. MODE="sample": raw‐pixel sampling
+# -----------------------------------------------------------------------------
+def fetch_ndvi_samples(regions_fc, hls_ic, crop_mask, years, months, n_samples):
+    rows = []
+    feats = regions_fc.getInfo().get('features', [])
+    for feat in feats:
+        name = feat['properties'].get('ADM3_EN')
+        geom = ee.Feature(feat).geometry()
+        for y in tqdm(years, desc="years", leave=False):
+            for m in tqdm(months, desc="months", leave=False):
+                coll = hls_ic.filterDate(
+                    ee.Date.fromYMD(y, m, 1),
+                    ee.Date.fromYMD(y, m, 1).advance(1, 'month')
+                )
+                if coll.size().getInfo() == 0:
+                    continue
+                ndvi = (
+                    coll.median()
+                        .normalizedDifference(['B5', 'B4'])
+                        .rename('NDVI')
+                        .updateMask(crop_mask)
+                )
+                samples = ndvi.sample(
+                    region=geom,
+                    scale=30,
+                    numPixels=n_samples,
+                    seed=42
+                ).getInfo().get('features', [])
+                for s in samples:
+                    rows.append({
+                        'ADM3_EN': name,
+                        'year':    y,
+                        'month':   m,
+                        'ndvi':    s['properties'].get('NDVI')
+                    })
+    return pd.DataFrame(rows, columns=['ADM3_EN', 'year', 'month', 'ndvi'])
+# -----------------------------------------------------------------------------
+# 2B. MODE="percentile": server‐side max‐NDVI heatmap
+# -----------------------------------------------------------------------------
+def fetch_ndvi_max(regions_fc, hls_ic, crop_mask, years):
+    rows = []
+    for y in tqdm(years, desc="years"):
+        img = hls_ic.filterDate(f'{y}-04-01', f'{y}-08-31').median()
+        ndvi = (
+            img.normalizedDifference(['B5', 'B4'])
+               .rename('NDVI')
+               .updateMask(crop_mask)
+        )
+        stats_fc = ndvi.reduceRegions(
+            collection=regions_fc,
+            reducer=ee.Reducer.max().setOutputs(['max_NDVI']),
+            scale=30,
+            tileScale=8
+        ).map(lambda f: f.set('year', y))
+        for feat in stats_fc.getInfo().get('features', []):
+            props = feat['properties']
+            val = props.get('max_NDVI')
+            max_ndvi = np.nan if val is None else float(val)
+            rows.append({
+                'ADM3_EN':  props.get('ADM3_EN'),
+                'year':     int(props.get('year')),
+                'max_NDVI': max_ndvi
+            })
+    return pd.DataFrame(rows, columns=['ADM3_EN', 'year', 'max_NDVI'])
+# -----------------------------------------------------------------------------
+# 3. Run selected mode and plot
+# -----------------------------------------------------------------------------
+if MODE == "sample":
+    df = fetch_ndvi_samples(REGIONS, HLS, CROP_MASK, YEARS, MONTHS, SAMPLES_PER_COMBO)
+    if df.empty:
+        raise ValueError("No samples -- check your mask/region.")
+    sns.catplot(
+        x='month', y='ndvi', col='year',
+        data=df.dropna(subset=['ndvi']), kind='box',
+        col_wrap=4, sharey=True,
+        height=3.5, aspect=1
+    ).fig.suptitle("Monthly NDVI Distributions by Year and Woreda", y=1.02)
+    plt.show()
+elif MODE == "percentile":
+    # 3.1 Fetch max‐NDVI
+    dfm = fetch_ndvi_max(REGIONS, HLS, CROP_MASK, YEARS)
+    if dfm.empty:
+        raise ValueError("No max‐NDVI values -- check your mask/region.")
+    # 3.2 Heatmap: Max NDVI
+    mat_ndvi = dfm.pivot(index='ADM3_EN', columns='year', values='max_NDVI')
+    plt.figure(figsize=(12,10))
+    sns.heatmap(mat_ndvi, annot=True, fmt=".2f", linewidths=0.5,
+                linecolor='gray', cbar_kws={'label':'Max NDVI'})
+    plt.title('Apr–Aug Max NDVI by Woreda and Year')
+    plt.xlabel('Year'); plt.ylabel('Woreda'); plt.xticks(rotation=45)
+    plt.tight_layout(); plt.show()
+    # 3.3 Heatmap: Yield
+    mat_yield = df_yield.pivot(index='ADM3_EN', columns='year', values='yield')
+    plt.figure(figsize=(12,10))
+    sns.heatmap(mat_yield, annot=True, fmt=".2f", linewidths=0.5,
+                linecolor='gray', cbar_kws={'label':'Yield (tn/ha)'})
+    plt.title('Crop Yield by Woreda and Year')
+    plt.xlabel('Year'); plt.ylabel('Woreda'); plt.xticks(rotation=45)
+    plt.tight_layout(); plt.show()
+    # 3.4 Scatter: one subplot per region (max 5 per row)
+    df_merge = pd.merge(df_yield, dfm, on=['ADM3_EN','year'], how='outer')
+    regions = sorted(df_merge['ADM3_EN'].unique())
+    n_regions = len(regions)
+    ncols = 5
+    nrows = math.ceil(n_regions / ncols)
+    fig, axes = plt.subplots(nrows, ncols,
+                             figsize=(5*ncols, 4*nrows),
+                             sharex=True, sharey=True)
+    axes = axes.flatten()
+    for ax, region in zip(axes, regions):
+        sub = df_merge[df_merge['ADM3_EN'] == region]
+        sns.scatterplot(data=sub, x='yield', y='max_NDVI', ax=ax)
+        ax.set_title(region)
+        ax.set_xlabel('Yield (tn/ha)')
+        ax.set_ylabel('Max NDVI')
+    for ax in axes[len(regions):]:
+        ax.set_visible(False)
+    plt.tight_layout()
+    plt.show()
+else:
+    raise ValueError(f"Unknown MODE={MODE!r}")

{geocif-0.2.32 → geocif-0.2.34}/geocif/viz/plot.py RENAMED Viewed

@@ -395,23 +395,16 @@ def plot_df_shpfile(
                 )
                 _name_country.append(cntr.replace(" ", "_").lower())
-            # Hack
-            if _name_country[0] == "russian_federation":
-                extent = [20, 80, 40, 80]
-            else:
-                extent = rgeo.get_country_lat_lon_extent(
-                    _name_country, buffer=1.0
-                )  # left, right, bottom, top
-                # Hack: Add space to the top for adding title
-                extent[3] = extent[3] + 2
-                # Add some space to the bottom for adding legend and colorbar
-                extent[2] = extent[2] - 3
-            try:
-                ax.set_extent(extent)
-            except:
-                breakpoint()
+            extent = rgeo.get_country_lat_lon_extent(
+                _name_country, buffer=1.0
+            )  # left, right, bottom, top
+            # Hack: Add space to the top for adding title
+            extent[3] = extent[3] + 2
+            # Add some space to the bottom for adding legend and colorbar
+            extent[2] = extent[2] - 3
+            ax.set_extent(extent)
         elif name_country == "world":
             ax.add_feature(
                 cartopy.feature.LAND.with_scale("50m"), color="white"

geocif-0.2.34/geocif/viz/viz_ml.py ADDED Viewed

@@ -0,0 +1,95 @@
+import sqlite3
+import pandas as pd
+import panel as pn
+import matplotlib.pyplot as plt
+# Enable Panel's Matplotlib support
+# Connect to the SQLite database
+conn = sqlite3.connect(r'D:\Users\ritvik\projects\GEOGLAM\Output\ml\db\presentation_v2.db')
+# Find every table except config*
+all_tables = pd.read_sql_query(
+    "SELECT name FROM sqlite_master WHERE type='table';",
+    conn
+)['name'].tolist()
+data_tables = [t for t in all_tables if not t.lower().startswith('config')]
+# Columns we need in each table
+required = {
+    'Country',
+    'Crop',
+    'Harvest Year',
+    'Observed Yield (tn per ha)',
+    'Predicted Yield (tn per ha)'
+}
+frames = []
+for tbl in data_tables:
+    cols = pd.read_sql_query(f"PRAGMA table_info('{tbl}');", conn)['name'].tolist()
+    if required.issubset(cols):
+        df = pd.read_sql_query(f"""
+            SELECT
+                Country,
+                Crop,
+                [Harvest Year]    AS year,
+                [Observed Yield (tn per ha)]   AS observed,
+                [Predicted Yield (tn per ha)]  AS predicted
+            FROM "{tbl}"
+        """, conn)
+        frames.append(df)
+if not frames:
+    raise ValueError("No tables found with the required schema!")
+df_all = pd.concat(frames, ignore_index=True)
+print(df_all)
+conn.close()
+# 3. Build Panel widgets
+country_select = pn.widgets.Select(
+    name='Country',
+    options=sorted(df_all['Country'].unique())
+)
+crop_select = pn.widgets.Select(name='Crop', options=[])
+year_select = pn.widgets.Select(
+    name='Year',
+    options=sorted(df_all['year'].astype(str).unique())
+)
+# When Country changes, update Crop list
+@pn.depends(country_select.param.value, watch=True)
+def update_crops(country):
+    crops = sorted(df_all[df_all['Country'] == country]['Crop'].unique())
+    crop_select.options = crops
+    if crops:
+        crop_select.value = crops[0]
+update_crops(country_select.value)
+# 4. Scatter plot: Observed vs Predicted
+@pn.depends(
+    country_select.param.value,
+    crop_select.param.value,
+    year_select.param.value
+)
+def scatter_plot(country, crop, year):
+    year = int(year)
+    # Change year column to dtype int
+    df_all['year'] = df_all['year'].astype(int)
+    df = df_all[(df_all['Country'] == country) & (df_all['Crop']    == crop)&(df_all['year']    == year)]
+    fig, ax = plt.subplots()
+    ax.scatter(df['observed'], df['predicted'])
+    ax.set_xlabel('Observed Yield (tn per ha)')
+    ax.set_ylabel('Predicted Yield (tn per ha)')
+    ax.set_title(f'{crop} in {country}, {year}')
+    return fig
+# 5. Assemble & serve
+dashboard = pn.Column(
+    pn.Row(country_select, crop_select, year_select),
+    scatter_plot
+)
+dashboard.servable()

{geocif-0.2.32 → geocif-0.2.34/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.2.32
+Version: 0.2.34
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal

{geocif-0.2.32 → geocif-0.2.34}/geocif.egg-info/SOURCES.txt RENAMED Viewed

@@ -72,11 +72,15 @@ geocif/playground/tmp2.py
 geocif/playground/tmp3.py
 geocif/playground/tmp4.py
 geocif/playground/tmp5.py
+geocif/playground/wolayita.py
 geocif/playground/wolayita_maize_mask.py
+geocif/playground/wolayita_v2.py
+geocif/playground/wolayita_v3.py
 geocif/risk/__init__.py
 geocif/risk/impact_assessment.py
 geocif/viz/__init__.py
 geocif/viz/gt.py
 geocif/viz/plot.py
 geocif/viz/tmp.py
+geocif/viz/viz_ml.py
 tests/test_geocif.py

{geocif-0.2.32 → geocif-0.2.34}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.2.32",
+    version="0.2.34",
     zip_safe=False,
 )

{geocif-0.2.32 → geocif-0.2.34}/LICENSE RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/MANIFEST.in RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/README.md RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/__init__.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/agmet/__init__.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/agmet/geoagmet.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/agmet/plot.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/agmet/utils.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/analysis.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/backup/__init__.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/backup/constants.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/backup/features.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/backup/geo.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/backup/geocif.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/backup/metadata.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/backup/models.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/cei/__init__.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/cei/definitions.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/experiments.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/geocif_runner.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_angola.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_madagascar.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_malawi.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_mozambique.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_south_africa.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_zambia.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/indices_runner_zimbabwe.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/logger.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/ml/__init__.py RENAMED Viewed

File without changes

{geocif-0.2.32 → geocif-0.2.34}/geocif/ml/feature_selection.py RENAMED Viewed

@@ -253,6 +253,7 @@ def select_features(
     elif method == "Leshy":
         import arfs.feature_selection.allrelevant as arfsgroot
         from catboost import CatBoostRegressor
         model = CatBoostRegressor(n_estimators=350, verbose=0, use_best_model=False)
         sel = arfsgroot.Leshy(
             model,
@@ -264,7 +265,6 @@ def select_features(
         )
         sel.fit(X_clean, y)
         selected = sel.get_feature_names_out()
     elif method == "PowerShap":
         from powershap import PowerShap
         from catboost import CatBoostRegressor