geocif 0.1.59__tar.gz → 0.1.61__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.59/geocif.egg-info → geocif-0.1.61}/PKG-INFO +1 -1
- {geocif-0.1.59 → geocif-0.1.61}/geocif/analysis.py +0 -5
- {geocif-0.1.59 → geocif-0.1.61}/geocif/geocif.py +20 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/feature_engineering.py +43 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/feature_selection.py +0 -1
- {geocif-0.1.59 → geocif-0.1.61/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.59 → geocif-0.1.61}/setup.py +1 -1
- {geocif-0.1.59 → geocif-0.1.61}/LICENSE +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/MANIFEST.in +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/README.md +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/__init__.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/backup/constants.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/backup/features.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/backup/geo.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/backup/models.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/cei/definitions.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/cei/indices.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/experiments.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/geocif_runner.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/indices_runner.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/indices_runner_angola.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/indices_runner_madagascar.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/indices_runner_malawi.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/indices_runner_mozambique.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/indices_runner_south_africa.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/indices_runner_zambia.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/indices_runner_zimbabwe.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/logger.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/correlations.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/embedding.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/output.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/spatial_autocorrelation.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/stages.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/stats.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/trend.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/ml/xai.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/mm.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/aa.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/automl.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/download_esi.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/enso.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/eval.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/gamtest.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/gee_access.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/misc.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/play_xagg.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/reg.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/sustain.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/test_catboost.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/tmp.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/tmp2.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/tmp3.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/tmp4.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/playground/tmp5.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/risk/__init__.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/risk/impact_assessment.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/utils.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif/viz/plot.py +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif.egg-info/SOURCES.txt +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/requirements.txt +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/setup.cfg +0 -0
- {geocif-0.1.59 → geocif-0.1.61}/tests/test_geocif.py +0 -0
@@ -186,11 +186,6 @@ class Geoanalysis:
|
|
186
186
|
return df_metrics, df_regional_metrics, df_national_yield
|
187
187
|
|
188
188
|
def _clean_data(self):
|
189
|
-
# Hack exclude 2012 if country == "illinois"
|
190
|
-
if self.country == "illinois":
|
191
|
-
self.df_analysis = self.df_analysis[
|
192
|
-
self.df_analysis["Harvest Year"] != 2012
|
193
|
-
]
|
194
189
|
# Remove rows with missing values in Observed Yield (tn per ha)
|
195
190
|
return self.df_analysis.dropna(subset=["Observed Yield (tn per ha)"])
|
196
191
|
|
@@ -587,6 +587,16 @@ class Geocif:
|
|
587
587
|
df_region[f"Median {self.target}"].values, 3
|
588
588
|
)
|
589
589
|
|
590
|
+
if f"Median {self.target} (2014-2018)" in df_region.columns:
|
591
|
+
df.loc[:, f"Median {self.target} (2014-2018)"] = np.around(
|
592
|
+
df_region[f"Median {self.target} (2014-2018)"].values, 3
|
593
|
+
)
|
594
|
+
|
595
|
+
if f"Median {self.target} (2013-2017)" in df_region.columns:
|
596
|
+
df.loc[:, f"Median {self.target} (2013-2017)"] = np.around(
|
597
|
+
df_region[f"Median {self.target} (2013-2017)"].values, 3
|
598
|
+
)
|
599
|
+
|
590
600
|
if self.estimate_ci:
|
591
601
|
if self.estimate_ci_for_all or self.forecast_season == self.today_year:
|
592
602
|
# Iterate over each element in y_pred_ci
|
@@ -730,6 +740,8 @@ class Geocif:
|
|
730
740
|
|
731
741
|
if self.median_yield_as_feature:
|
732
742
|
self.feature_names.append(f"Median {self.target}")
|
743
|
+
self.feature_names.append(f"Median {self.target} (2014-2018)")
|
744
|
+
self.feature_names.append(f"Median {self.target} (2013-2017)")
|
733
745
|
|
734
746
|
if self.lag_yield_as_feature:
|
735
747
|
# For the number of years specified in self.number_lag_years
|
@@ -988,6 +1000,14 @@ class Geocif:
|
|
988
1000
|
df, self.all_seasons_with_yield, self.number_median_years, self.target
|
989
1001
|
)
|
990
1002
|
|
1003
|
+
df = fe.compute_user_median_statistics(
|
1004
|
+
df, [2014, 2015, 2016, 2017, 2018]
|
1005
|
+
)
|
1006
|
+
|
1007
|
+
df = fe.compute_user_median_statistics(
|
1008
|
+
df, [2013, 2014, 2015, 2016, 2017]
|
1009
|
+
)
|
1010
|
+
|
991
1011
|
if self.median_area_as_feature:
|
992
1012
|
df = fe.compute_median_statistics(
|
993
1013
|
df, self.all_seasons_with_yield, self.number_median_years, "Area (ha)"
|
@@ -105,6 +105,49 @@ def compute_median_statistics(
|
|
105
105
|
return df
|
106
106
|
|
107
107
|
|
108
|
+
def compute_user_median_statistics(df, user_years, target_col="Yield (tn per ha)"):
|
109
|
+
"""
|
110
|
+
Enhances the DataFrame with a new column that contains the median yield computed
|
111
|
+
using only the yields from the user-specified list of years.
|
112
|
+
|
113
|
+
Args:
|
114
|
+
df (DataFrame): The original DataFrame containing yield data.
|
115
|
+
user_years (array-like): List of years to consider for computing the median yield.
|
116
|
+
target_col (str): The column name from which to compute the median yield.
|
117
|
+
|
118
|
+
Returns:
|
119
|
+
DataFrame: The original DataFrame enhanced with a new column for median yield.
|
120
|
+
"""
|
121
|
+
# Ensure 'Harvest Year' is treated as integer for accurate comparisons.
|
122
|
+
df["Harvest Year"] = df["Harvest Year"].astype(int)
|
123
|
+
|
124
|
+
# Sort the user_years list to reliably extract the earliest and latest years.
|
125
|
+
user_years_sorted = sorted(user_years)
|
126
|
+
first_year = user_years_sorted[0]
|
127
|
+
last_year = user_years_sorted[-1]
|
128
|
+
|
129
|
+
# Define the new column name to include the range of years.
|
130
|
+
new_col_name = f"Median {target_col} ({first_year}-{last_year})"
|
131
|
+
|
132
|
+
# Initialize the new column with NaN values.
|
133
|
+
df[new_col_name] = np.nan
|
134
|
+
|
135
|
+
# Group by region and compute the median yield for the specified years.
|
136
|
+
for region, group in tqdm(df.groupby("Region"), desc="Median yield", leave=False):
|
137
|
+
# Skip if the target column is completely null for this region.
|
138
|
+
if group[target_col].isnull().all():
|
139
|
+
continue
|
140
|
+
|
141
|
+
# Filter the rows to only include harvest years that are in the user provided list.
|
142
|
+
mask = group["Harvest Year"].isin(user_years)
|
143
|
+
median_yield = group.loc[mask, target_col].median()
|
144
|
+
|
145
|
+
# Assign the computed median yield to all rows in the current region.
|
146
|
+
df.loc[df["Region"] == region, new_col_name] = median_yield
|
147
|
+
|
148
|
+
return df
|
149
|
+
|
150
|
+
|
108
151
|
def compute_lag_yield(
|
109
152
|
df, all_seasons_with_yield, number_lag_years, target_col="Yield (tn per ha)"
|
110
153
|
):
|
@@ -192,7 +192,6 @@ def select_features(X, y, method="RFE", min_features_to_select=3, threshold_nan=
|
|
192
192
|
from boruta import BorutaPy
|
193
193
|
|
194
194
|
selector = BorutaPy(forest, n_estimators="auto", random_state=42)
|
195
|
-
breakpoint()
|
196
195
|
selector.fit(X.values, y.values)
|
197
196
|
selected_features_mask = selector.support_
|
198
197
|
selected_features = X.columns[selected_features_mask].tolist()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|