PyPI - geocif - Versions diffs - 0.1.70__tar.gz → 0.1.72__tar.gz - Mend

geocif 0.1.70tar.gz → 0.1.72tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

{geocif-0.1.70/geocif.egg-info → geocif-0.1.72}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.70
+Version: 0.1.72
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal
@@ -24,6 +24,10 @@ License-File: LICENSE
 [![image](https://img.shields.io/conda/vn/conda-forge/geocif.svg)](https://anaconda.org/conda-forge/geocif)
+**Generate Climatic Impact-Drivers (CIDs) from Earth Observation (EO) data**
+[Climatic Impact-Drivers for Crop Yield Assessment at NASA Harvest](https://www.loom.com/share/5c2dc62356c6406193cd9d9725c2a6a9)
 **Models to visualize and forecast crop conditions and yields**

{geocif-0.1.70 → geocif-0.1.72}/README.md RENAMED Viewed

@@ -5,6 +5,10 @@
 [![image](https://img.shields.io/conda/vn/conda-forge/geocif.svg)](https://anaconda.org/conda-forge/geocif)
+**Generate Climatic Impact-Drivers (CIDs) from Earth Observation (EO) data**
+[Climatic Impact-Drivers for Crop Yield Assessment at NASA Harvest](https://www.loom.com/share/5c2dc62356c6406193cd9d9725c2a6a9)
 **Models to visualize and forecast crop conditions and yields**

{geocif-0.1.70 → geocif-0.1.72}/geocif/analysis.py RENAMED Viewed

@@ -560,12 +560,12 @@ class Geoanalysis:
         )
         # --- For computing median yields ---
-        # Compute median yield for 2014 - 2018
-        df_median_2014_2018 = (
-            df_all[df_all["Harvest Year"].between(2014, 2018)]
+        # Compute median yield for 2018 - 2022
+        df_median_2018_2022 = (
+            df_all[df_all["Harvest Year"].between(2018, 2022)]
             .groupby("Region")["Yield (tn per ha)"]
-            .median()
-            .rename(f"Median Yield (tn per ha) (2014-2018)")
+            .mean()
+            .rename(f"Median Yield (tn per ha) (2018-2022)")
             .reset_index()
         )
@@ -573,7 +573,7 @@ class Geoanalysis:
         df_median_2013_2017 = (
             df_all[df_all["Harvest Year"].between(2013, 2017)]
             .groupby("Region")["Yield (tn per ha)"]
-            .median()
+            .mean()
             .rename("Median Yield (tn per ha) (2013-2017)")
             .reset_index()
         )
@@ -581,7 +581,7 @@ class Geoanalysis:
         # Merge the median yield columns with the % of total production dataframe
         df_historic = (
             df_pct
-            .merge(df_median_2014_2018, on="Region", how="left")
+            .merge(df_median_2018_2022, on="Region", how="left")
             .merge(df_median_2013_2017, on="Region", how="left")
         )
@@ -885,7 +885,9 @@ class Geoanalysis:
         # Get the ML section
         df_ml = self.df_config[self.df_config["Section"] == "ML"]
-        self.countries = ["malawi"]
+        self.countries = ast.literal_eval(
+            df_ml[df_ml["Option"] == "countries"]["Value"].values[0]
+        )
         for country in self.countries:
             df = self.df_config[self.df_config["Section"] == country]

{geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_angola.py RENAMED Viewed

@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
         combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
         if True:
-            num_cpu = int(cpu_count() * 0.9)
+            num_cpu = int(cpu_count() * 0.2)
             with Pool(num_cpu) as p:
                 for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
                     pass

{geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_madagascar.py RENAMED Viewed

@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
         combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
         if True:
-            num_cpu = int(cpu_count() * 0.1)
+            num_cpu = int(cpu_count() * 0.2)
             with Pool(num_cpu) as p:
                 for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
                     pass

{geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_malawi.py RENAMED Viewed

@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
         combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
         if True:
-            num_cpu = int(cpu_count() * 0.1)
+            num_cpu = int(cpu_count() * 0.2)
             with Pool(num_cpu) as p:
                 for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
                     pass

{geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_mozambique.py RENAMED Viewed

@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
         combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
         if True:
-            num_cpu = int(cpu_count() * 0.1)
+            num_cpu = int(cpu_count() * 0.2)
             with Pool(num_cpu) as p:
                 for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
                     pass

{geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_south_africa.py RENAMED Viewed

@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
         combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
         if True:
-            num_cpu = int(cpu_count() * 0.8)
+            num_cpu = int(cpu_count() * 0.2)
             with Pool(num_cpu) as p:
                 for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
                     pass

{geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_zambia.py RENAMED Viewed

@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
         combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
         if True:
-            num_cpu = int(cpu_count() * 0.1)
+            num_cpu = int(cpu_count() * 0.2)
             with Pool(num_cpu) as p:
                 for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
                     pass

{geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_zimbabwe.py RENAMED Viewed

@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
         combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
         if True:
-            num_cpu = int(cpu_count() * 0.1)
+            num_cpu = int(cpu_count() * 0.2)
             with Pool(num_cpu) as p:
                 for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
                     pass

{geocif-0.1.70 → geocif-0.1.72}/geocif/playground/aa.py RENAMED Viewed

@@ -1,3 +1,89 @@
+import geopandas as gpd
+import pandas as pd
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+# --- 1. Read data ---
+dg = gpd.read_file(r"D:\Users\ritvik\projects\GEOGLAM\safrica.shp")
+df = pd.read_csv(r"D:\Users\ritvik\projects\GEOGLAM\geocif_march_2025.csv")
+# --- 2. Create the new "Country Region" column ---
+dg['Country Region'] = (
+    dg.apply(
+        lambda row: (
+            f"{row['ADMIN0']} {row['ADMIN2']}"
+            if pd.notnull(row['ADMIN2'])
+            else f"{row['ADMIN0']} {row['ADMIN1']}"
+        ),
+        axis=1
+    )
+    .str.lower()
+    .str.replace(' ', '_')
+)
+# --- 3. Merge shapefile with CSV ---
+merged = dg.merge(df, left_on='Country Region', right_on='Country Region', how='right')
+# --- 4. Rename columns ---
+merged.rename(
+    columns={
+        '% Anomaly (2013-2017)': '2013_2017',
+        '% Anomaly (2018-2022)': '2018_2022'
+    },
+    inplace=True
+)
+# Optional: Write out merged shapefile
+merged.to_file(r"D:\Users\ritvik\projects\GEOGLAM\safrica_geocif_march_2025.shp")
+# --- 5. Plot ---
+fig, ax = plt.subplots(1, 2, figsize=(20, 10))
+# Reduce horizontal space between subplots
+plt.subplots_adjust(wspace=0.05)
+# Shared color normalization
+norm = mpl.colors.Normalize(vmin=-40, vmax=40)
+# Plot the anomaly maps (no country boundaries)
+merged.plot(
+    column='2013_2017',
+    cmap='BrBG',
+    norm=norm,
+    ax=ax[0],
+    legend=False
+)
+ax[0].set_title('Maize Yield Forecast % Anomaly (2013-2017)')
+ax[0].axis('off')
+merged.plot(
+    column='2018_2022',
+    cmap='BrBG',
+    norm=norm,
+    ax=ax[1],
+    legend=False
+)
+ax[1].set_title('Maize Yield Forecast % Anomaly (2018-2022)')
+ax[1].axis('off')
+# Create a single horizontal colorbar
+sm = mpl.cm.ScalarMappable(norm=norm, cmap='BrBG')
+sm.set_array([])
+cbar = fig.colorbar(
+    sm,
+    ax=ax.ravel().tolist(),
+    orientation='horizontal',
+    fraction=0.05,
+    pad=0.05,
+    extend='both'
+)
+cbar.set_label('% Anomaly')
+plt.savefig(r"D:\Users\ritvik\projects\GEOGLAM\maize_yield_forecast_anomaly.png", dpi=300)
+breakpoint()
 from great_tables import GT, html
 import pandas as pd

{geocif-0.1.70 → geocif-0.1.72}/geocif/viz/tmp.py RENAMED Viewed

@@ -1,13 +1,15 @@
 import geopandas as gpd
 import palettable as pal
 import matplotlib.colors as mcolors
+import matplotlib.pyplot as plt
+import seaborn as sns
+import numpy as np
 import pandas as pd
 import glob
 import os
 # 1. Specify the directory containing your .dta files:
-data_dir =  r"C:\Users\ritvik\Downloads\maize_yield (2)\maize_yield"
+data_dir =  r"C:\Users\ritvik\Downloads\maize_yield\maize_yield"
 # 2. Use glob to find all .dta files in that directory:
 dta_files = glob.glob(os.path.join(data_dir, "*.dta"))
@@ -41,7 +43,7 @@ merged_df['W_CODE'] = '7' + merged_df['W_CODE']
 merged_df['W_CODE'] = merged_df['W_CODE'].str.replace('.0', '')
 merged_df['W_CODE'] = merged_df['W_CODE'].astype(int)
-dg = gpd.read_file(r"wolayita_dissolved.shp")
+dg = gpd.read_file(r"D:\Users\ritvik\projects\GEOGLAM\Input\Global_Datasets\Regions\Shps\wolayita_dissolved.shp")
 dg = dg[['W_CODE', 'W_NAME']]
 # Merge the two dataframes on W_CODE
@@ -52,10 +54,201 @@ merged_df = merged_df.dropna(subset=['PROD98CQ', 'AREAH'])
 # Compte yield column
 merged_df['yield'] = merged_df['PROD98CQ'] / merged_df['AREAH']
+merged_df.to_csv(r'D:\Users\ritvik\projects\GEOGLAM\Output\crop_condition\March_27_2025\plots\EWCM\kabele.csv', index=False)
+breakpoint()
+# Add a histogram showing distribution of yields, use separate line and color for each 5-year period
+# Create a new column 'Year Group' which groups years into 5-year periods
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+# 1. Group years into 5-year periods
+merged_df['Year Group'] = pd.cut(
+    merged_df['YEAR'],
+    bins=range(2005, 2026, 5),
+    right=False
+)
+# 2. Create a FacetGrid with one facet per Year Group
+g = sns.FacetGrid(
+    merged_df,
+    col="Year Group",
+    col_wrap=2,
+    height=4,
+    sharex=True,
+    sharey=True
+)
+# 3. Map an ECDF plot to each facet
+#    'skyblue' is used for consistency with your original color choice
+g.map(sns.ecdfplot, 'yield', color='skyblue')
+# 4. Add vertical/horizontal lines, annotations, etc. in each facet
+for ax, year_group in zip(g.axes.flatten(), g.col_names):
+    # -- Subset data for this particular facet
+    subset = merged_df[merged_df['Year Group'] == year_group]
+    # -- Vertical line at yield=16
+    ax.axvline(x=16, color='red', linestyle='--')
+    # -- Annotate the line at yield=16
+    ax.annotate(
+        '16 QQ/ha',
+        xy=(16, 0.8),  # x=16, y=0.8 in data coordinates (cumulative fraction)
+        xytext=(5, 0),  # offset the text to the right by 5 points
+        textcoords='offset points',
+        rotation=90,
+        color='red',
+        ha='center',
+        va='center',
+        fontsize=9
+    )
+    # -- Horizontal lines for quintiles on the y-axis (20%, 40%, 60%, 80%)
+    for q in [0.2, 0.4, 0.6, 0.8]:
+        ax.axhline(y=q, color='green', linestyle='--')
+        # (Optional) label each horizontal line:
+        # ax.text(ax.get_xlim()[1]*0.9, q, f"{int(q*100)}%",
+        #         va='center', ha='right', color='green', fontsize=9)
+    # -- Number of observations in top-right corner
+    n_obs = len(subset)
+    ax.text(
+        0.95, 0.95,
+        f"N = {n_obs}",
+        transform=ax.transAxes,
+        ha='right',
+        va='top',
+        fontsize=9,
+        color='black'
+    )
+# 5. Common title
+plt.subplots_adjust(top=0.9)
+g.fig.suptitle("Yield Distribution at Kabele Level (ECDF)", fontsize=16)
+plt.show()
+# Find the percentage of values below 16 QQ/ha and Year Group 2015-2020 or 2020-2025
+#below_16 = merged_df[(merged_df['yield'] < 16) & (merged_df['YEAR'] >=2015) & (merged_df['YEAR'] < 2025)]
+#breakpoint()
+#below_16_pct = below_16.mean() * 100
+#print(f"Percentage of yields below 16 QQ/ha at Kabele level: {below_16_pct:.2f}%")
+# Create a heatmap showing the number of unique FA's per DIST and YEAR combination
+# Group by DIST and YEAR, then count the number of unique FA's
+df_fa_counts = merged_df.groupby(['DIST', 'YEAR'])['FA'].nunique().reset_index(name='FA_Count')
+# Pivot the data so that rows = DIST, columns = YEAR, values = FA_Count
+df_fa_pivot = df_fa_counts.pivot(index='DIST', columns='YEAR', values='FA_Count')
+# Change year column to type int
+df_fa_pivot.columns = df_fa_pivot.columns.astype(int)
+# Create the heatmap
+plt.figure(figsize=(12, 8))
+sns.heatmap(
+    df_fa_pivot,
+    cmap='viridis',       # color map; try 'coolwarm' or others
+    annot=True,           # show numeric values in each cell
+    fmt="g",            # format numbers (2 decimal places)
+    linewidths=.5         # line width between cells
+)
+plt.show()
 # create a new dataframe which computes average yield by W_NAME for each year, do a weighted average using FWEIGHT column
 df_avg_yield = merged_df.groupby(['W_NAME', 'YEAR']).apply(lambda x: np.average(x['yield'], weights=x['FWEIGHT'])).reset_index(name='yield')
+# Add a histogram showing distribution of yields, use separate line and color for each 5-year period
+# Create a new column 'Year Group' which groups years into 5-year periods
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+# 1. Create custom bins and labels so that the last group is 2020-2021
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+# 1. Define custom bins so that the last group is labeled 2020–2021
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+# 1. Define custom bins so that the last group is labeled 2020–2021
+df_avg_yield['Year Group'] = pd.cut(
+    df_avg_yield['YEAR'],
+    bins=[2005, 2010, 2015, 2020, 2022],  # stops at 2022 so the label reads "2020–2021"
+    labels=['2005–2009', '2010–2014', '2015–2019', '2020–2021'],
+    right=False
+)
+# 2. Create a FacetGrid by Year Group
+g = sns.FacetGrid(
+    df_avg_yield,
+    col='Year Group',
+    col_wrap=2,
+    height=4,
+    sharex=True,
+    sharey=True
+)
+# 3. Map an ECDF plot (instead of histogram) in each facet
+g.map_dataframe(sns.ecdfplot, x='yield')
+# 4. Add lines and annotations to each facet
+for i, ax in enumerate(g.axes.flatten()):
+    # Subset the data for the current Year Group
+    year_group = g.col_names[i]
+    subset = df_avg_yield[df_avg_yield['Year Group'] == year_group]
+    # -- Vertical line at yield=16
+    ax.axvline(x=16, color='red', linestyle='--')
+    # Annotate that line near the top of the plot
+    ax.annotate(
+        "16 QQ/ha",
+        xy=(16, 0.8),  # (x=16, y=0.8 in data coords for the y-axis)
+        xytext=(5, 0),
+        textcoords='offset points',
+        rotation=90,
+        color='red',
+        ha='center',
+        va='center',
+        fontsize=9
+    )
+    # -- Horizontal lines at 20%, 40%, 60%, 80% (quintiles in terms of fraction)
+    for q in [0.2, 0.4, 0.6, 0.8]:
+        ax.axhline(y=q, color='green', linestyle='--')
+    # -- Number of observations in the top-right corner
+    n_obs = len(subset)
+    ax.text(
+        0.95, 0.95,
+        f"N = {n_obs}",
+        transform=ax.transAxes,
+        ha='right',
+        va='top',
+        fontsize=9,
+        color='black'
+    )
+# 5. Overall title
+plt.subplots_adjust(top=0.9)
+g.fig.suptitle("Yield Distribution at Woreda Level (ECDF)", fontsize=16)
+plt.show()
+# Find the percentage of values below 16 QQ/ha
+below_16 = df_avg_yield['yield'] < 16
+below_16 = df_avg_yield[(df_avg_yield['yield'] < 16) & (df_avg_yield['YEAR'] >=2015) & (df_avg_yield['YEAR'] < 2025)]
+breakpoint()
+below_16_pct = below_16.mean() * 100
+print(f"Percentage of yields below 16 QQ/ha: {below_16_pct:.2f}%")
+breakpoint()
 # Change W_NAME column to title case
 df_avg_yield['W_NAME'] = df_avg_yield['W_NAME'].str.title()

{geocif-0.1.70 → geocif-0.1.72/geocif.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: geocif
-Version: 0.1.70
+Version: 0.1.72
 Summary: Models to visualize and forecast crop conditions and yields
 Home-page: https://ritviksahajpal.github.io/yield_forecasting/
 Author: Ritvik Sahajpal
@@ -24,6 +24,10 @@ License-File: LICENSE
 [![image](https://img.shields.io/conda/vn/conda-forge/geocif.svg)](https://anaconda.org/conda-forge/geocif)
+**Generate Climatic Impact-Drivers (CIDs) from Earth Observation (EO) data**
+[Climatic Impact-Drivers for Crop Yield Assessment at NASA Harvest](https://www.loom.com/share/5c2dc62356c6406193cd9d9725c2a6a9)
 **Models to visualize and forecast crop conditions and yields**

{geocif-0.1.70 → geocif-0.1.72}/setup.py RENAMED Viewed

@@ -50,6 +50,6 @@ setup(
     test_suite="tests",
     tests_require=test_requirements,
     url="https://ritviksahajpal.github.io/yield_forecasting/",
-    version="0.1.70",
+    version="0.1.72",
     zip_safe=False,
 )