geocif 0.1.67__tar.gz → 0.1.68__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. {geocif-0.1.67/geocif.egg-info → geocif-0.1.68}/PKG-INFO +1 -1
  2. {geocif-0.1.67 → geocif-0.1.68}/geocif/cei/definitions.py +8 -8
  3. {geocif-0.1.67 → geocif-0.1.68}/geocif/geocif.py +21 -18
  4. {geocif-0.1.67 → geocif-0.1.68}/geocif/geocif_runner.py +34 -35
  5. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/feature_selection.py +15 -1
  6. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/stats.py +1 -1
  7. {geocif-0.1.67 → geocif-0.1.68}/geocif/viz/tmp.py +20 -7
  8. {geocif-0.1.67 → geocif-0.1.68/geocif.egg-info}/PKG-INFO +1 -1
  9. {geocif-0.1.67 → geocif-0.1.68}/setup.py +1 -1
  10. {geocif-0.1.67 → geocif-0.1.68}/LICENSE +0 -0
  11. {geocif-0.1.67 → geocif-0.1.68}/MANIFEST.in +0 -0
  12. {geocif-0.1.67 → geocif-0.1.68}/README.md +0 -0
  13. {geocif-0.1.67 → geocif-0.1.68}/geocif/__init__.py +0 -0
  14. {geocif-0.1.67 → geocif-0.1.68}/geocif/agmet/__init__.py +0 -0
  15. {geocif-0.1.67 → geocif-0.1.68}/geocif/agmet/geoagmet.py +0 -0
  16. {geocif-0.1.67 → geocif-0.1.68}/geocif/agmet/plot.py +0 -0
  17. {geocif-0.1.67 → geocif-0.1.68}/geocif/agmet/utils.py +0 -0
  18. {geocif-0.1.67 → geocif-0.1.68}/geocif/analysis.py +0 -0
  19. {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/__init__.py +0 -0
  20. {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/constants.py +0 -0
  21. {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/features.py +0 -0
  22. {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/geo.py +0 -0
  23. {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/geocif.py +0 -0
  24. {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/metadata.py +0 -0
  25. {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/models.py +0 -0
  26. {geocif-0.1.67 → geocif-0.1.68}/geocif/cei/__init__.py +0 -0
  27. {geocif-0.1.67 → geocif-0.1.68}/geocif/cei/indices.py +0 -0
  28. {geocif-0.1.67 → geocif-0.1.68}/geocif/experiments.py +0 -0
  29. {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner.py +0 -0
  30. {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_angola.py +0 -0
  31. {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_madagascar.py +0 -0
  32. {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_malawi.py +0 -0
  33. {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_mozambique.py +0 -0
  34. {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_south_africa.py +0 -0
  35. {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_zambia.py +0 -0
  36. {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_zimbabwe.py +0 -0
  37. {geocif-0.1.67 → geocif-0.1.68}/geocif/logger.py +0 -0
  38. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/__init__.py +0 -0
  39. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/correlations.py +0 -0
  40. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/embedding.py +0 -0
  41. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/feature_engineering.py +0 -0
  42. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/outliers.py +0 -0
  43. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/outlook.py +0 -0
  44. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/output.py +0 -0
  45. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/spatial_autocorrelation.py +0 -0
  46. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/stages.py +0 -0
  47. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/trainers.py +0 -0
  48. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/trend.py +0 -0
  49. {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/xai.py +0 -0
  50. {geocif-0.1.67 → geocif-0.1.68}/geocif/mm.py +0 -0
  51. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/__init__.py +0 -0
  52. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/aa.py +0 -0
  53. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/area.py +0 -0
  54. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/automl.py +0 -0
  55. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/download_esi.py +0 -0
  56. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/enso.py +0 -0
  57. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/eval.py +0 -0
  58. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/gamtest.py +0 -0
  59. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/gee_access.py +0 -0
  60. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/misc.py +0 -0
  61. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/play_xagg.py +0 -0
  62. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/reg.py +0 -0
  63. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/sustain.py +0 -0
  64. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/test_catboost.py +0 -0
  65. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/tmp.py +0 -0
  66. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/tmp2.py +0 -0
  67. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/tmp3.py +0 -0
  68. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/tmp4.py +0 -0
  69. {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/tmp5.py +0 -0
  70. {geocif-0.1.67 → geocif-0.1.68}/geocif/risk/__init__.py +0 -0
  71. {geocif-0.1.67 → geocif-0.1.68}/geocif/risk/impact_assessment.py +0 -0
  72. {geocif-0.1.67 → geocif-0.1.68}/geocif/utils.py +0 -0
  73. {geocif-0.1.67 → geocif-0.1.68}/geocif/viz/__init__.py +0 -0
  74. {geocif-0.1.67 → geocif-0.1.68}/geocif/viz/plot.py +0 -0
  75. {geocif-0.1.67 → geocif-0.1.68}/geocif.egg-info/SOURCES.txt +0 -0
  76. {geocif-0.1.67 → geocif-0.1.68}/geocif.egg-info/dependency_links.txt +0 -0
  77. {geocif-0.1.67 → geocif-0.1.68}/geocif.egg-info/not-zip-safe +0 -0
  78. {geocif-0.1.67 → geocif-0.1.68}/geocif.egg-info/top_level.txt +0 -0
  79. {geocif-0.1.67 → geocif-0.1.68}/requirements.txt +0 -0
  80. {geocif-0.1.67 → geocif-0.1.68}/setup.cfg +0 -0
  81. {geocif-0.1.67 → geocif-0.1.68}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.67
3
+ Version: 0.1.68
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -1,11 +1,11 @@
1
1
  PHENOLOGICAL_STAGES = [1, 2, 3]
2
2
  dict_indices = {
3
3
  "GD4": ["Cold", "Growing degree days (sum of Tmean > 4 C)"],
4
- #"CFD": ["Cold", "Maximum number of consecutive frost days (Tmin < 0 C)"],
5
- #"FD": ["Cold", "Number of Frost Days (Tmin < 0C)"],
4
+ "CFD": ["Cold", "Maximum number of consecutive frost days (Tmin < 0 C)"],
5
+ "FD": ["Cold", "Number of Frost Days (Tmin < 0C)"],
6
6
  "HD17": ["Cold", "Heating degree days (sum of Tmean < 17 C)"],
7
- #"ID": ["Cold", "Number of sharp Ice Days (Tmax < 0C)"],
8
- #"CSDI": ["Cold", "Cold-spell duration index"],
7
+ "ID": ["Cold", "Number of sharp Ice Days (Tmax < 0C)"],
8
+ "CSDI": ["Cold", "Cold-spell duration index"],
9
9
  "TG10p": ["Cold", "Percentage of days when Tmean < 10th percentile"],
10
10
  "TN10p": ["Cold", "Percentage of days when Tmin < 10th percentile"],
11
11
  "TXn": ["Cold", "Minimum daily maximum temperature"],
@@ -70,10 +70,10 @@ dict_indices = {
70
70
  "Compound",
71
71
  "Days with TG > 75th percentile of daily mean temperature and RR >75th percentile of daily precipitation sum",
72
72
  ],
73
- # "SD": ["Snow", "Mean of daily snow depth"],
74
- # "SD1": ["Snow", "Number of days with snow depth >= 1 cm"],
75
- # "SD5cm": ["Snow", "Number of days with snow depth >= 5 cm"],
76
- # "SD50cm": ["Snow", "Number of days with snow depth >= 50 cm"],
73
+ "SD": ["Snow", "Mean of daily snow depth"],
74
+ "SD1": ["Snow", "Number of days with snow depth >= 1 cm"],
75
+ "SD5cm": ["Snow", "Number of days with snow depth >= 5 cm"],
76
+ "SD50cm": ["Snow", "Number of days with snow depth >= 50 cm"],
77
77
  }
78
78
 
79
79
  dict_ndvi = {
@@ -598,15 +598,15 @@ class Geocif:
598
598
  df_region[f"Median {self.target}"].values, 3
599
599
  )
600
600
 
601
- if f"Median {self.target} (2014-2018)" in df_region.columns:
602
- df.loc[:, f"Median {self.target} (2014-2018)"] = np.around(
603
- df_region[f"Median {self.target} (2014-2018)"].values, 3
604
- )
605
-
606
- if f"Median {self.target} (2013-2017)" in df_region.columns:
607
- df.loc[:, f"Median {self.target} (2013-2017)"] = np.around(
608
- df_region[f"Median {self.target} (2013-2017)"].values, 3
609
- )
601
+ # if f"Median {self.target} (2014-2018)" in df_region.columns:
602
+ # df.loc[:, f"Median {self.target} (2014-2018)"] = np.around(
603
+ # df_region[f"Median {self.target} (2014-2018)"].values, 3
604
+ # )
605
+ #
606
+ # if f"Median {self.target} (2013-2017)" in df_region.columns:
607
+ # df.loc[:, f"Median {self.target} (2013-2017)"] = np.around(
608
+ # df_region[f"Median {self.target} (2013-2017)"].values, 3
609
+ # )
610
610
 
611
611
  if self.estimate_ci:
612
612
  if self.estimate_ci_for_all or self.forecast_season == self.today_year:
@@ -820,8 +820,8 @@ class Geocif:
820
820
  + self.statistics_columns
821
821
  + self.feature_names
822
822
  + [f"Median {self.target}"]
823
- + [f"Median {self.target} (2014-2018)"]
824
- + [f"Median {self.target} (2013-2017)"]
823
+ #+ [f"Median {self.target} (2014-2018)"]
824
+ #+ [f"Median {self.target} (2013-2017)"]
825
825
  + ["Region_ID"]
826
826
  )
827
827
  if self.check_yield_trend:
@@ -1011,13 +1011,13 @@ class Geocif:
1011
1011
  df, self.all_seasons_with_yield, self.number_median_years, self.target
1012
1012
  )
1013
1013
 
1014
- df = fe.compute_user_median_statistics(
1015
- df, [2014, 2015, 2016, 2017, 2018]
1016
- )
1017
-
1018
- df = fe.compute_user_median_statistics(
1019
- df, [2013, 2014, 2015, 2016, 2017]
1020
- )
1014
+ # df = fe.compute_user_median_statistics(
1015
+ # df, [2014, 2015, 2016, 2017, 2018]
1016
+ # )
1017
+ #
1018
+ # df = fe.compute_user_median_statistics(
1019
+ # df, [2013, 2014, 2015, 2016, 2017]
1020
+ # )
1021
1021
 
1022
1022
  if self.median_area_as_feature:
1023
1023
  df = fe.compute_median_statistics(
@@ -1393,6 +1393,9 @@ class Geocif:
1393
1393
  self.dg["ADM0_NAME"].str.lower().str.replace(" ", "_") == self.country
1394
1394
  ]
1395
1395
 
1396
+ # Drop any duplicates based on Country Region column
1397
+ self.dg_country = self.dg_country.drop_duplicates(subset=["Country Region"])
1398
+
1396
1399
  def read_data(self, country, crop, season):
1397
1400
  """
1398
1401
 
@@ -26,41 +26,40 @@ def loop_execute(inputs):
26
26
  Returns:
27
27
 
28
28
  """
29
- # from pycallgraph2 import Config, PyCallGraph, GlobbingFilter
30
- # from pycallgraph2.output import GraphvizOutput
31
- #
32
- # graphviz = GraphvizOutput()
33
- # graphviz.output_file = "geocif_visualization.png"
34
- # plt.rcParams["figure.dpi"] = 600
35
- # config = Config(max_depth=5)
36
- # config.trace_filter = GlobbingFilter(
37
- # exclude=[
38
- # "pycallgraph.*",
39
- # "torch*",
40
- # ]
41
- # )
42
- #
43
- # with PyCallGraph(output=graphviz, config=config):
44
- project_name, country, crop, season, model, logger, parser, index = inputs
45
-
46
- logger.info("=====================================================")
47
- logger.info(f"\tStarting GEOCIF: {country} {crop} {season} {model}")
48
- logger.info("=====================================================")
49
-
50
- obj = geocif.Geocif(logger=logger,
51
- parser=parser,
52
- project_name=project_name)
53
- obj.read_data(country, crop, season)
54
-
55
- # Store config file in database, only execute this for
56
- # the first iteration of the loop
57
- if index == 0:
58
- output.config_to_db(obj.db_path, obj.parser, obj.today)
59
-
60
- # Setup metadata and run ML code
61
- obj.setup(season, model)
62
- if obj.simulation_stages:
63
- obj.execute()
29
+ from pycallgraph2 import Config, PyCallGraph, GlobbingFilter
30
+ from pycallgraph2.output import GraphvizOutput
31
+
32
+ graphviz = GraphvizOutput()
33
+ graphviz.output_file = "geocif_visualization.png"
34
+ plt.rcParams["figure.dpi"] = 600
35
+ config = Config(max_depth=5)
36
+ config.trace_filter = GlobbingFilter(
37
+ exclude=[
38
+ "pycallgraph.*",
39
+ ]
40
+ )
41
+
42
+ with PyCallGraph(output=graphviz, config=config):
43
+ project_name, country, crop, season, model, logger, parser, index = inputs
44
+
45
+ logger.info("=====================================================")
46
+ logger.info(f"\tStarting GEOCIF: {country} {crop} {season} {model}")
47
+ logger.info("=====================================================")
48
+
49
+ obj = geocif.Geocif(logger=logger,
50
+ parser=parser,
51
+ project_name=project_name)
52
+ obj.read_data(country, crop, season)
53
+
54
+ # Store config file in database, only execute this for
55
+ # the first iteration of the loop
56
+ if index == 0:
57
+ output.config_to_db(obj.db_path, obj.parser, obj.today)
58
+
59
+ # Setup metadata and run ML code
60
+ obj.setup(season, model)
61
+ if obj.simulation_stages:
62
+ obj.execute()
64
63
 
65
64
 
66
65
  def gather_inputs(parser):
@@ -188,10 +188,24 @@ def select_features(X, y, method="RFE", min_features_to_select=3, threshold_nan=
188
188
 
189
189
  # Get the selected feature names
190
190
  selected_features = X.columns[selected_features].tolist()
191
+ elif method == "lasso":
192
+ from sklearn.linear_model import LassoLarsCV
193
+ from sklearn.feature_selection import SelectFromModel
194
+
195
+ # Fit Lasso model (L1 regularization) to perform feature selection
196
+ lasso = LassoLarsCV(cv=5)
197
+ lasso.fit(X, y)
198
+
199
+ # Use SelectFromModel to remove features with zero coefficients
200
+ selector = SelectFromModel(lasso, prefit=True)
201
+
202
+ # Get the selected features
203
+ selected_features = X.columns[selector.get_support()].tolist()
204
+ print(selected_features)
191
205
  elif method == "BorutaPy":
192
206
  from boruta import BorutaPy
193
207
 
194
- selector = BorutaPy(forest, n_estimators="auto", random_state=42)
208
+ selector = BorutaPy(forest, n_estimators="auto", random_state=42, verbose=0)
195
209
  selector.fit(X.values, y.values)
196
210
  selected_features_mask = selector.support_
197
211
  selected_features = X.columns[selected_features_mask].tolist()
@@ -203,7 +203,7 @@ def add_statistics(
203
203
  fn = "illinois.csv"
204
204
  elif country == "Ethiopia":
205
205
  # HACK
206
- fn = "ethiopia_wheat_summary_formatted.csv"
206
+ fn = "adm_crop_production.csv"
207
207
  else:
208
208
  fn = "adm_crop_production.csv"
209
209
  df_fewsnet = pd.read_csv(dir_stats / fn, low_memory=False)
@@ -1,6 +1,4 @@
1
1
  import geopandas as gpd
2
- import pandas as pd
3
- import matplotlib.pyplot as plt
4
2
  import palettable as pal
5
3
  import matplotlib.colors as mcolors
6
4
 
@@ -9,7 +7,7 @@ import glob
9
7
  import os
10
8
 
11
9
  # 1. Specify the directory containing your .dta files:
12
- data_dir = r"C:\Users\ritvik\Downloads\maize_yield\maize_yield"
10
+ data_dir = r"."
13
11
 
14
12
  # 2. Use glob to find all .dta files in that directory:
15
13
  dta_files = glob.glob(os.path.join(data_dir, "*.dta"))
@@ -20,6 +18,13 @@ dataframes = [pd.read_stata(f) for f in dta_files]
20
18
  # 4. Concatenate them all into one DataFrame (row-wise):
21
19
  merged_df = pd.concat(dataframes, ignore_index=True)
22
20
 
21
+ # Replace null values in PROD98CQ with those in PROD columns
22
+ merged_df['PROD98CQ'] = merged_df['PROD98CQ'].fillna(merged_df['PROD'])
23
+ merged_df['YEAR'] = merged_df['YEAR'].fillna(merged_df['year'])
24
+
25
+ # Drop rows where AREAH is 0
26
+ merged_df = merged_df[merged_df['AREAH'] != 0]
27
+
23
28
  merged_df['ZONE'] = merged_df['ZONE'].astype(int)
24
29
  merged_df['DIST'] = merged_df['DIST'].astype(int)
25
30
 
@@ -36,7 +41,7 @@ merged_df['W_CODE'] = '7' + merged_df['W_CODE']
36
41
  merged_df['W_CODE'] = merged_df['W_CODE'].str.replace('.0', '')
37
42
  merged_df['W_CODE'] = merged_df['W_CODE'].astype(int)
38
43
 
39
- dg = gpd.read_file(r"D:\Users\ritvik\projects\GEOGLAM\Input\countries\wolayita\wolayita_dissolved.shp")
44
+ dg = gpd.read_file(r"wolayita_dissolved.shp")
40
45
  dg = dg[['W_CODE', 'W_NAME']]
41
46
 
42
47
  # Merge the two dataframes on W_CODE
@@ -48,8 +53,8 @@ merged_df = merged_df.dropna(subset=['PROD98CQ', 'AREAH'])
48
53
  # Compte yield column
49
54
  merged_df['yield'] = merged_df['PROD98CQ'] / merged_df['AREAH']
50
55
 
51
- # create a new dataframe which computes average yield by W_NAME for each year
52
- df_avg_yield = merged_df.groupby(['W_NAME', 'YEAR'])['yield'].mean().reset_index()
56
+ # create a new dataframe which computes average yield by W_NAME for each year, do a weighted average using FWEIGHT column
57
+ df_avg_yield = merged_df.groupby(['W_NAME', 'YEAR']).apply(lambda x: np.average(x['yield'], weights=x['FWEIGHT'])).reset_index(name='yield')
53
58
 
54
59
  # Change W_NAME column to title case
55
60
  df_avg_yield['W_NAME'] = df_avg_yield['W_NAME'].str.title()
@@ -64,7 +69,15 @@ df_avg_yield = df_avg_yield.pivot(index='W_NAME', columns='YEAR', values='yield'
64
69
  df_avg_yield.index.name = None
65
70
  df_avg_yield.columns.name = None
66
71
 
67
- df_avg_yield.to_csv('wolayita_yields.csv')
72
+ df_avg_yield.to_csv('wolayita_yields_v8.csv')
73
+ breakpoint()
74
+ # Compare wolayita_yields_v2.csv with wolayita_yields.csv
75
+ # 1. Load the two CSV files
76
+ df_v1 = pd.read_csv('wolayita_yields.csv')
77
+ df_v2 = pd.read_csv('wolayita_yields_v2.csv')
78
+
79
+ # 2. Check if the two DataFrames are equal
80
+ print(df_v1.equals(df_v2))
68
81
 
69
82
  breakpoint()
70
83
  # 5. (Optional) Inspect the merged DataFrame
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.67
3
+ Version: 0.1.68
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.67",
53
+ version="0.1.68",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes