geocif 0.1.70__tar.gz → 0.1.72__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {geocif-0.1.70/geocif.egg-info → geocif-0.1.72}/PKG-INFO +5 -1
  2. {geocif-0.1.70 → geocif-0.1.72}/README.md +4 -0
  3. {geocif-0.1.70 → geocif-0.1.72}/geocif/analysis.py +10 -8
  4. {geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_angola.py +1 -1
  5. {geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_madagascar.py +1 -1
  6. {geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_malawi.py +1 -1
  7. {geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_mozambique.py +1 -1
  8. {geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_south_africa.py +1 -1
  9. {geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_zambia.py +1 -1
  10. {geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner_zimbabwe.py +1 -1
  11. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/aa.py +86 -0
  12. {geocif-0.1.70 → geocif-0.1.72}/geocif/viz/tmp.py +196 -3
  13. {geocif-0.1.70 → geocif-0.1.72/geocif.egg-info}/PKG-INFO +5 -1
  14. {geocif-0.1.70 → geocif-0.1.72}/setup.py +1 -1
  15. {geocif-0.1.70 → geocif-0.1.72}/LICENSE +0 -0
  16. {geocif-0.1.70 → geocif-0.1.72}/MANIFEST.in +0 -0
  17. {geocif-0.1.70 → geocif-0.1.72}/geocif/__init__.py +0 -0
  18. {geocif-0.1.70 → geocif-0.1.72}/geocif/agmet/__init__.py +0 -0
  19. {geocif-0.1.70 → geocif-0.1.72}/geocif/agmet/geoagmet.py +0 -0
  20. {geocif-0.1.70 → geocif-0.1.72}/geocif/agmet/plot.py +0 -0
  21. {geocif-0.1.70 → geocif-0.1.72}/geocif/agmet/utils.py +0 -0
  22. {geocif-0.1.70 → geocif-0.1.72}/geocif/backup/__init__.py +0 -0
  23. {geocif-0.1.70 → geocif-0.1.72}/geocif/backup/constants.py +0 -0
  24. {geocif-0.1.70 → geocif-0.1.72}/geocif/backup/features.py +0 -0
  25. {geocif-0.1.70 → geocif-0.1.72}/geocif/backup/geo.py +0 -0
  26. {geocif-0.1.70 → geocif-0.1.72}/geocif/backup/geocif.py +0 -0
  27. {geocif-0.1.70 → geocif-0.1.72}/geocif/backup/metadata.py +0 -0
  28. {geocif-0.1.70 → geocif-0.1.72}/geocif/backup/models.py +0 -0
  29. {geocif-0.1.70 → geocif-0.1.72}/geocif/cei/__init__.py +0 -0
  30. {geocif-0.1.70 → geocif-0.1.72}/geocif/cei/definitions.py +0 -0
  31. {geocif-0.1.70 → geocif-0.1.72}/geocif/cei/indices.py +0 -0
  32. {geocif-0.1.70 → geocif-0.1.72}/geocif/experiments.py +0 -0
  33. {geocif-0.1.70 → geocif-0.1.72}/geocif/geocif.py +0 -0
  34. {geocif-0.1.70 → geocif-0.1.72}/geocif/geocif_runner.py +0 -0
  35. {geocif-0.1.70 → geocif-0.1.72}/geocif/indices_runner.py +0 -0
  36. {geocif-0.1.70 → geocif-0.1.72}/geocif/logger.py +0 -0
  37. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/__init__.py +0 -0
  38. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/correlations.py +0 -0
  39. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/embedding.py +0 -0
  40. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/feature_engineering.py +0 -0
  41. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/feature_selection.py +0 -0
  42. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/outliers.py +0 -0
  43. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/outlook.py +0 -0
  44. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/output.py +0 -0
  45. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/spatial_autocorrelation.py +0 -0
  46. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/stages.py +0 -0
  47. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/stats.py +0 -0
  48. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/trainers.py +0 -0
  49. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/trend.py +0 -0
  50. {geocif-0.1.70 → geocif-0.1.72}/geocif/ml/xai.py +0 -0
  51. {geocif-0.1.70 → geocif-0.1.72}/geocif/mm.py +0 -0
  52. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/__init__.py +0 -0
  53. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/area.py +0 -0
  54. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/automl.py +0 -0
  55. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/download_esi.py +0 -0
  56. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/enso.py +0 -0
  57. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/eval.py +0 -0
  58. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/gamtest.py +0 -0
  59. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/gee_access.py +0 -0
  60. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/misc.py +0 -0
  61. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/play_xagg.py +0 -0
  62. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/reg.py +0 -0
  63. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/sustain.py +0 -0
  64. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/test_catboost.py +0 -0
  65. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/tmp.py +0 -0
  66. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/tmp2.py +0 -0
  67. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/tmp3.py +0 -0
  68. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/tmp4.py +0 -0
  69. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/tmp5.py +0 -0
  70. {geocif-0.1.70 → geocif-0.1.72}/geocif/playground/wolayita_maize_mask.py +0 -0
  71. {geocif-0.1.70 → geocif-0.1.72}/geocif/risk/__init__.py +0 -0
  72. {geocif-0.1.70 → geocif-0.1.72}/geocif/risk/impact_assessment.py +0 -0
  73. {geocif-0.1.70 → geocif-0.1.72}/geocif/utils.py +0 -0
  74. {geocif-0.1.70 → geocif-0.1.72}/geocif/viz/__init__.py +0 -0
  75. {geocif-0.1.70 → geocif-0.1.72}/geocif/viz/gt.py +0 -0
  76. {geocif-0.1.70 → geocif-0.1.72}/geocif/viz/plot.py +0 -0
  77. {geocif-0.1.70 → geocif-0.1.72}/geocif.egg-info/SOURCES.txt +0 -0
  78. {geocif-0.1.70 → geocif-0.1.72}/geocif.egg-info/dependency_links.txt +0 -0
  79. {geocif-0.1.70 → geocif-0.1.72}/geocif.egg-info/not-zip-safe +0 -0
  80. {geocif-0.1.70 → geocif-0.1.72}/geocif.egg-info/top_level.txt +0 -0
  81. {geocif-0.1.70 → geocif-0.1.72}/requirements.txt +0 -0
  82. {geocif-0.1.70 → geocif-0.1.72}/setup.cfg +0 -0
  83. {geocif-0.1.70 → geocif-0.1.72}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.70
3
+ Version: 0.1.72
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -24,6 +24,10 @@ License-File: LICENSE
24
24
  [![image](https://img.shields.io/conda/vn/conda-forge/geocif.svg)](https://anaconda.org/conda-forge/geocif)
25
25
 
26
26
 
27
+ **Generate Climatic Impact-Drivers (CIDs) from Earth Observation (EO) data**
28
+
29
+ [Climatic Impact-Drivers for Crop Yield Assessment at NASA Harvest](https://www.loom.com/share/5c2dc62356c6406193cd9d9725c2a6a9)
30
+
27
31
  **Models to visualize and forecast crop conditions and yields**
28
32
 
29
33
 
@@ -5,6 +5,10 @@
5
5
  [![image](https://img.shields.io/conda/vn/conda-forge/geocif.svg)](https://anaconda.org/conda-forge/geocif)
6
6
 
7
7
 
8
+ **Generate Climatic Impact-Drivers (CIDs) from Earth Observation (EO) data**
9
+
10
+ [Climatic Impact-Drivers for Crop Yield Assessment at NASA Harvest](https://www.loom.com/share/5c2dc62356c6406193cd9d9725c2a6a9)
11
+
8
12
  **Models to visualize and forecast crop conditions and yields**
9
13
 
10
14
 
@@ -560,12 +560,12 @@ class Geoanalysis:
560
560
  )
561
561
 
562
562
  # --- For computing median yields ---
563
- # Compute median yield for 2014 - 2018
564
- df_median_2014_2018 = (
565
- df_all[df_all["Harvest Year"].between(2014, 2018)]
563
+ # Compute median yield for 2018 - 2022
564
+ df_median_2018_2022 = (
565
+ df_all[df_all["Harvest Year"].between(2018, 2022)]
566
566
  .groupby("Region")["Yield (tn per ha)"]
567
- .median()
568
- .rename(f"Median Yield (tn per ha) (2014-2018)")
567
+ .mean()
568
+ .rename(f"Median Yield (tn per ha) (2018-2022)")
569
569
  .reset_index()
570
570
  )
571
571
 
@@ -573,7 +573,7 @@ class Geoanalysis:
573
573
  df_median_2013_2017 = (
574
574
  df_all[df_all["Harvest Year"].between(2013, 2017)]
575
575
  .groupby("Region")["Yield (tn per ha)"]
576
- .median()
576
+ .mean()
577
577
  .rename("Median Yield (tn per ha) (2013-2017)")
578
578
  .reset_index()
579
579
  )
@@ -581,7 +581,7 @@ class Geoanalysis:
581
581
  # Merge the median yield columns with the % of total production dataframe
582
582
  df_historic = (
583
583
  df_pct
584
- .merge(df_median_2014_2018, on="Region", how="left")
584
+ .merge(df_median_2018_2022, on="Region", how="left")
585
585
  .merge(df_median_2013_2017, on="Region", how="left")
586
586
  )
587
587
 
@@ -885,7 +885,9 @@ class Geoanalysis:
885
885
  # Get the ML section
886
886
  df_ml = self.df_config[self.df_config["Section"] == "ML"]
887
887
 
888
- self.countries = ["malawi"]
888
+ self.countries = ast.literal_eval(
889
+ df_ml[df_ml["Option"] == "countries"]["Value"].values[0]
890
+ )
889
891
  for country in self.countries:
890
892
  df = self.df_config[self.df_config["Section"] == country]
891
893
 
@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
174
174
  combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
175
175
 
176
176
  if True:
177
- num_cpu = int(cpu_count() * 0.9)
177
+ num_cpu = int(cpu_count() * 0.2)
178
178
  with Pool(num_cpu) as p:
179
179
  for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
180
180
  pass
@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
174
174
  combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
175
175
 
176
176
  if True:
177
- num_cpu = int(cpu_count() * 0.1)
177
+ num_cpu = int(cpu_count() * 0.2)
178
178
  with Pool(num_cpu) as p:
179
179
  for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
180
180
  pass
@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
174
174
  combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
175
175
 
176
176
  if True:
177
- num_cpu = int(cpu_count() * 0.1)
177
+ num_cpu = int(cpu_count() * 0.2)
178
178
  with Pool(num_cpu) as p:
179
179
  for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
180
180
  pass
@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
174
174
  combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
175
175
 
176
176
  if True:
177
- num_cpu = int(cpu_count() * 0.1)
177
+ num_cpu = int(cpu_count() * 0.2)
178
178
  with Pool(num_cpu) as p:
179
179
  for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
180
180
  pass
@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
174
174
  combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
175
175
 
176
176
  if True:
177
- num_cpu = int(cpu_count() * 0.8)
177
+ num_cpu = int(cpu_count() * 0.2)
178
178
  with Pool(num_cpu) as p:
179
179
  for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
180
180
  pass
@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
174
174
  combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
175
175
 
176
176
  if True:
177
- num_cpu = int(cpu_count() * 0.1)
177
+ num_cpu = int(cpu_count() * 0.2)
178
178
  with Pool(num_cpu) as p:
179
179
  for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
180
180
  pass
@@ -174,7 +174,7 @@ class cei_runner(base.BaseGeo):
174
174
  combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
175
175
 
176
176
  if True:
177
- num_cpu = int(cpu_count() * 0.1)
177
+ num_cpu = int(cpu_count() * 0.2)
178
178
  with Pool(num_cpu) as p:
179
179
  for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
180
180
  pass
@@ -1,3 +1,89 @@
1
+ import geopandas as gpd
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+ import matplotlib as mpl
5
+
6
+ # --- 1. Read data ---
7
+ dg = gpd.read_file(r"D:\Users\ritvik\projects\GEOGLAM\safrica.shp")
8
+ df = pd.read_csv(r"D:\Users\ritvik\projects\GEOGLAM\geocif_march_2025.csv")
9
+
10
+ # --- 2. Create the new "Country Region" column ---
11
+ dg['Country Region'] = (
12
+ dg.apply(
13
+ lambda row: (
14
+ f"{row['ADMIN0']} {row['ADMIN2']}"
15
+ if pd.notnull(row['ADMIN2'])
16
+ else f"{row['ADMIN0']} {row['ADMIN1']}"
17
+ ),
18
+ axis=1
19
+ )
20
+ .str.lower()
21
+ .str.replace(' ', '_')
22
+ )
23
+
24
+ # --- 3. Merge shapefile with CSV ---
25
+ merged = dg.merge(df, left_on='Country Region', right_on='Country Region', how='right')
26
+
27
+ # --- 4. Rename columns ---
28
+ merged.rename(
29
+ columns={
30
+ '% Anomaly (2013-2017)': '2013_2017',
31
+ '% Anomaly (2018-2022)': '2018_2022'
32
+ },
33
+ inplace=True
34
+ )
35
+
36
+ # Optional: Write out merged shapefile
37
+ merged.to_file(r"D:\Users\ritvik\projects\GEOGLAM\safrica_geocif_march_2025.shp")
38
+
39
+ # --- 5. Plot ---
40
+ fig, ax = plt.subplots(1, 2, figsize=(20, 10))
41
+
42
+ # Reduce horizontal space between subplots
43
+ plt.subplots_adjust(wspace=0.05)
44
+
45
+ # Shared color normalization
46
+ norm = mpl.colors.Normalize(vmin=-40, vmax=40)
47
+
48
+ # Plot the anomaly maps (no country boundaries)
49
+ merged.plot(
50
+ column='2013_2017',
51
+ cmap='BrBG',
52
+ norm=norm,
53
+ ax=ax[0],
54
+ legend=False
55
+ )
56
+ ax[0].set_title('Maize Yield Forecast % Anomaly (2013-2017)')
57
+ ax[0].axis('off')
58
+
59
+ merged.plot(
60
+ column='2018_2022',
61
+ cmap='BrBG',
62
+ norm=norm,
63
+ ax=ax[1],
64
+ legend=False
65
+ )
66
+ ax[1].set_title('Maize Yield Forecast % Anomaly (2018-2022)')
67
+ ax[1].axis('off')
68
+
69
+ # Create a single horizontal colorbar
70
+ sm = mpl.cm.ScalarMappable(norm=norm, cmap='BrBG')
71
+ sm.set_array([])
72
+ cbar = fig.colorbar(
73
+ sm,
74
+ ax=ax.ravel().tolist(),
75
+ orientation='horizontal',
76
+ fraction=0.05,
77
+ pad=0.05,
78
+ extend='both'
79
+ )
80
+ cbar.set_label('% Anomaly')
81
+
82
+ plt.savefig(r"D:\Users\ritvik\projects\GEOGLAM\maize_yield_forecast_anomaly.png", dpi=300)
83
+
84
+
85
+
86
+ breakpoint()
1
87
  from great_tables import GT, html
2
88
  import pandas as pd
3
89
 
@@ -1,13 +1,15 @@
1
1
  import geopandas as gpd
2
2
  import palettable as pal
3
3
  import matplotlib.colors as mcolors
4
-
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import numpy as np
5
7
  import pandas as pd
6
8
  import glob
7
9
  import os
8
10
 
9
11
  # 1. Specify the directory containing your .dta files:
10
- data_dir = r"C:\Users\ritvik\Downloads\maize_yield (2)\maize_yield"
12
+ data_dir = r"C:\Users\ritvik\Downloads\maize_yield\maize_yield"
11
13
 
12
14
  # 2. Use glob to find all .dta files in that directory:
13
15
  dta_files = glob.glob(os.path.join(data_dir, "*.dta"))
@@ -41,7 +43,7 @@ merged_df['W_CODE'] = '7' + merged_df['W_CODE']
41
43
  merged_df['W_CODE'] = merged_df['W_CODE'].str.replace('.0', '')
42
44
  merged_df['W_CODE'] = merged_df['W_CODE'].astype(int)
43
45
 
44
- dg = gpd.read_file(r"wolayita_dissolved.shp")
46
+ dg = gpd.read_file(r"D:\Users\ritvik\projects\GEOGLAM\Input\Global_Datasets\Regions\Shps\wolayita_dissolved.shp")
45
47
  dg = dg[['W_CODE', 'W_NAME']]
46
48
 
47
49
  # Merge the two dataframes on W_CODE
@@ -52,10 +54,201 @@ merged_df = merged_df.dropna(subset=['PROD98CQ', 'AREAH'])
52
54
 
53
55
  # Compte yield column
54
56
  merged_df['yield'] = merged_df['PROD98CQ'] / merged_df['AREAH']
57
+ merged_df.to_csv(r'D:\Users\ritvik\projects\GEOGLAM\Output\crop_condition\March_27_2025\plots\EWCM\kabele.csv', index=False)
58
+ breakpoint()
59
+ # Add a histogram showing distribution of yields, use separate line and color for each 5-year period
60
+ # Create a new column 'Year Group' which groups years into 5-year periods
61
+ import pandas as pd
62
+ import seaborn as sns
63
+ import matplotlib.pyplot as plt
64
+
65
+ # 1. Group years into 5-year periods
66
+ merged_df['Year Group'] = pd.cut(
67
+ merged_df['YEAR'],
68
+ bins=range(2005, 2026, 5),
69
+ right=False
70
+ )
71
+
72
+ # 2. Create a FacetGrid with one facet per Year Group
73
+ g = sns.FacetGrid(
74
+ merged_df,
75
+ col="Year Group",
76
+ col_wrap=2,
77
+ height=4,
78
+ sharex=True,
79
+ sharey=True
80
+ )
81
+
82
+ # 3. Map an ECDF plot to each facet
83
+ # 'skyblue' is used for consistency with your original color choice
84
+ g.map(sns.ecdfplot, 'yield', color='skyblue')
85
+
86
+ # 4. Add vertical/horizontal lines, annotations, etc. in each facet
87
+ for ax, year_group in zip(g.axes.flatten(), g.col_names):
88
+ # -- Subset data for this particular facet
89
+ subset = merged_df[merged_df['Year Group'] == year_group]
90
+
91
+ # -- Vertical line at yield=16
92
+ ax.axvline(x=16, color='red', linestyle='--')
93
+
94
+ # -- Annotate the line at yield=16
95
+ ax.annotate(
96
+ '16 QQ/ha',
97
+ xy=(16, 0.8), # x=16, y=0.8 in data coordinates (cumulative fraction)
98
+ xytext=(5, 0), # offset the text to the right by 5 points
99
+ textcoords='offset points',
100
+ rotation=90,
101
+ color='red',
102
+ ha='center',
103
+ va='center',
104
+ fontsize=9
105
+ )
106
+
107
+ # -- Horizontal lines for quintiles on the y-axis (20%, 40%, 60%, 80%)
108
+ for q in [0.2, 0.4, 0.6, 0.8]:
109
+ ax.axhline(y=q, color='green', linestyle='--')
110
+ # (Optional) label each horizontal line:
111
+ # ax.text(ax.get_xlim()[1]*0.9, q, f"{int(q*100)}%",
112
+ # va='center', ha='right', color='green', fontsize=9)
113
+
114
+ # -- Number of observations in top-right corner
115
+ n_obs = len(subset)
116
+ ax.text(
117
+ 0.95, 0.95,
118
+ f"N = {n_obs}",
119
+ transform=ax.transAxes,
120
+ ha='right',
121
+ va='top',
122
+ fontsize=9,
123
+ color='black'
124
+ )
125
+
126
+ # 5. Common title
127
+ plt.subplots_adjust(top=0.9)
128
+ g.fig.suptitle("Yield Distribution at Kabele Level (ECDF)", fontsize=16)
129
+
130
+ plt.show()
131
+
132
+ # Find the percentage of values below 16 QQ/ha and Year Group 2015-2020 or 2020-2025
133
+ #below_16 = merged_df[(merged_df['yield'] < 16) & (merged_df['YEAR'] >=2015) & (merged_df['YEAR'] < 2025)]
134
+ #breakpoint()
135
+ #below_16_pct = below_16.mean() * 100
136
+ #print(f"Percentage of yields below 16 QQ/ha at Kabele level: {below_16_pct:.2f}%")
137
+
138
+
139
+ # Create a heatmap showing the number of unique FA's per DIST and YEAR combination
140
+ # Group by DIST and YEAR, then count the number of unique FA's
141
+ df_fa_counts = merged_df.groupby(['DIST', 'YEAR'])['FA'].nunique().reset_index(name='FA_Count')
142
+
143
+ # Pivot the data so that rows = DIST, columns = YEAR, values = FA_Count
144
+ df_fa_pivot = df_fa_counts.pivot(index='DIST', columns='YEAR', values='FA_Count')
145
+
146
+ # Change year column to type int
147
+ df_fa_pivot.columns = df_fa_pivot.columns.astype(int)
148
+
149
+ # Create the heatmap
150
+
151
+ plt.figure(figsize=(12, 8))
152
+ sns.heatmap(
153
+ df_fa_pivot,
154
+ cmap='viridis', # color map; try 'coolwarm' or others
155
+ annot=True, # show numeric values in each cell
156
+ fmt="g", # format numbers (2 decimal places)
157
+ linewidths=.5 # line width between cells
158
+ )
159
+ plt.show()
55
160
 
56
161
  # create a new dataframe which computes average yield by W_NAME for each year, do a weighted average using FWEIGHT column
57
162
  df_avg_yield = merged_df.groupby(['W_NAME', 'YEAR']).apply(lambda x: np.average(x['yield'], weights=x['FWEIGHT'])).reset_index(name='yield')
58
163
 
164
+ # Add a histogram showing distribution of yields, use separate line and color for each 5-year period
165
+ # Create a new column 'Year Group' which groups years into 5-year periods
166
+ import pandas as pd
167
+ import seaborn as sns
168
+ import matplotlib.pyplot as plt
169
+
170
+ # 1. Create custom bins and labels so that the last group is 2020-2021
171
+ import pandas as pd
172
+ import seaborn as sns
173
+ import matplotlib.pyplot as plt
174
+
175
+ # 1. Define custom bins so that the last group is labeled 2020–2021
176
+ import pandas as pd
177
+ import seaborn as sns
178
+ import matplotlib.pyplot as plt
179
+
180
+ # 1. Define custom bins so that the last group is labeled 2020–2021
181
+ df_avg_yield['Year Group'] = pd.cut(
182
+ df_avg_yield['YEAR'],
183
+ bins=[2005, 2010, 2015, 2020, 2022], # stops at 2022 so the label reads "2020–2021"
184
+ labels=['2005–2009', '2010–2014', '2015–2019', '2020–2021'],
185
+ right=False
186
+ )
187
+
188
+ # 2. Create a FacetGrid by Year Group
189
+ g = sns.FacetGrid(
190
+ df_avg_yield,
191
+ col='Year Group',
192
+ col_wrap=2,
193
+ height=4,
194
+ sharex=True,
195
+ sharey=True
196
+ )
197
+
198
+ # 3. Map an ECDF plot (instead of histogram) in each facet
199
+ g.map_dataframe(sns.ecdfplot, x='yield')
200
+
201
+ # 4. Add lines and annotations to each facet
202
+ for i, ax in enumerate(g.axes.flatten()):
203
+ # Subset the data for the current Year Group
204
+ year_group = g.col_names[i]
205
+ subset = df_avg_yield[df_avg_yield['Year Group'] == year_group]
206
+
207
+ # -- Vertical line at yield=16
208
+ ax.axvline(x=16, color='red', linestyle='--')
209
+ # Annotate that line near the top of the plot
210
+ ax.annotate(
211
+ "16 QQ/ha",
212
+ xy=(16, 0.8), # (x=16, y=0.8 in data coords for the y-axis)
213
+ xytext=(5, 0),
214
+ textcoords='offset points',
215
+ rotation=90,
216
+ color='red',
217
+ ha='center',
218
+ va='center',
219
+ fontsize=9
220
+ )
221
+
222
+ # -- Horizontal lines at 20%, 40%, 60%, 80% (quintiles in terms of fraction)
223
+ for q in [0.2, 0.4, 0.6, 0.8]:
224
+ ax.axhline(y=q, color='green', linestyle='--')
225
+
226
+ # -- Number of observations in the top-right corner
227
+ n_obs = len(subset)
228
+ ax.text(
229
+ 0.95, 0.95,
230
+ f"N = {n_obs}",
231
+ transform=ax.transAxes,
232
+ ha='right',
233
+ va='top',
234
+ fontsize=9,
235
+ color='black'
236
+ )
237
+
238
+ # 5. Overall title
239
+ plt.subplots_adjust(top=0.9)
240
+ g.fig.suptitle("Yield Distribution at Woreda Level (ECDF)", fontsize=16)
241
+
242
+ plt.show()
243
+
244
+ # Find the percentage of values below 16 QQ/ha
245
+ below_16 = df_avg_yield['yield'] < 16
246
+ below_16 = df_avg_yield[(df_avg_yield['yield'] < 16) & (df_avg_yield['YEAR'] >=2015) & (df_avg_yield['YEAR'] < 2025)]
247
+ breakpoint()
248
+ below_16_pct = below_16.mean() * 100
249
+ print(f"Percentage of yields below 16 QQ/ha: {below_16_pct:.2f}%")
250
+
251
+ breakpoint()
59
252
  # Change W_NAME column to title case
60
253
  df_avg_yield['W_NAME'] = df_avg_yield['W_NAME'].str.title()
61
254
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.70
3
+ Version: 0.1.72
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -24,6 +24,10 @@ License-File: LICENSE
24
24
  [![image](https://img.shields.io/conda/vn/conda-forge/geocif.svg)](https://anaconda.org/conda-forge/geocif)
25
25
 
26
26
 
27
+ **Generate Climatic Impact-Drivers (CIDs) from Earth Observation (EO) data**
28
+
29
+ [Climatic Impact-Drivers for Crop Yield Assessment at NASA Harvest](https://www.loom.com/share/5c2dc62356c6406193cd9d9725c2a6a9)
30
+
27
31
  **Models to visualize and forecast crop conditions and yields**
28
32
 
29
33
 
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.70",
53
+ version="0.1.72",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes