geocif 0.1.30__tar.gz → 0.1.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.30/geocif.egg-info → geocif-0.1.31}/PKG-INFO +1 -1
- {geocif-0.1.30 → geocif-0.1.31}/geocif/geocif.py +7 -4
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/spatial_autocorrelation.py +46 -27
- {geocif-0.1.30 → geocif-0.1.31/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.30 → geocif-0.1.31}/setup.py +1 -1
- {geocif-0.1.30 → geocif-0.1.31}/LICENSE +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/MANIFEST.in +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/README.md +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/__init__.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/analysis.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/constants.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/features.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/geo.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/backup/models.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/cei/definitions.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/cei/indices.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/indices_runner.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/logger.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/correlations.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/embedding.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/feature_engineering.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/feature_selection.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/output.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/stages.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/stats.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/trend.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/ml/xai.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/playground/automl.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/playground/misc.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/utils.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif/viz/plot.py +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif.egg-info/SOURCES.txt +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/requirements.txt +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/setup.cfg +0 -0
- {geocif-0.1.30 → geocif-0.1.31}/tests/test_geocif.py +0 -0
@@ -355,6 +355,8 @@ class Geocif:
|
|
355
355
|
experiment_id = f"{self.country}_{self.crop}"
|
356
356
|
now = ar.utcnow().to("America/New_York").format("MMMM-DD-YYYY HH:mm:ss")
|
357
357
|
selected_features = self.selected_features + self.cat_features
|
358
|
+
# Compute percentage difference between y_pred and y_test
|
359
|
+
ape = np.abs((y_pred - y_test) / y_test) * 100
|
358
360
|
df = pd.DataFrame(
|
359
361
|
{
|
360
362
|
"Experiment_ID": np.full(shp, experiment_id),
|
@@ -378,6 +380,7 @@ class Geocif:
|
|
378
380
|
"Area (ha)": df_region["Area (ha)"].values,
|
379
381
|
f"Observed {self.target}": np.around(y_test, 3).ravel(),
|
380
382
|
f"Predicted {self.target}": np.around(y_pred, 3).ravel(),
|
383
|
+
f"APE": np.around(ape, 3).ravel(),
|
381
384
|
}
|
382
385
|
)
|
383
386
|
|
@@ -720,7 +723,7 @@ class Geocif:
|
|
720
723
|
""" Convert this dataframe into an ML ready format and save to disk """
|
721
724
|
df = self.create_ml_dataframe(df)
|
722
725
|
dir_output = (
|
723
|
-
self.dir_analysis / self.country / self.crop / str(self.forecast_season)
|
726
|
+
self.dir_analysis / self.country / self.crop / self.model_name / str(self.forecast_season)
|
724
727
|
)
|
725
728
|
os.makedirs(dir_output, exist_ok=True)
|
726
729
|
df.to_csv(
|
@@ -768,6 +771,9 @@ class Geocif:
|
|
768
771
|
dict_kwargs["dg_country"] = self.dg_country
|
769
772
|
dict_kwargs["combined_dict"] = self.combined_dict
|
770
773
|
|
774
|
+
if self.spatial_autocorrelation:
|
775
|
+
sa.compute_spatial_autocorrelation(self.df_results, **dict_kwargs)
|
776
|
+
|
771
777
|
if self.correlation_plots:
|
772
778
|
self.logger.info(f"Correlation plot for {self.country} {self.crop}")
|
773
779
|
(
|
@@ -775,9 +781,6 @@ class Geocif:
|
|
775
781
|
dict_best_cei,
|
776
782
|
) = correlations.all_correlated_feature_by_time(df, **dict_kwargs)
|
777
783
|
|
778
|
-
if self.spatial_autocorrelation:
|
779
|
-
sa.compute_spatial_autocorrelation(self.df_results, **dict_kwargs)
|
780
|
-
|
781
784
|
""" Separate into train and test datasets based on forecast_season """
|
782
785
|
mask = df["Harvest Year"] == self.forecast_season
|
783
786
|
self.df_train = df[~mask]
|
@@ -1,7 +1,11 @@
|
|
1
|
+
import warnings
|
2
|
+
|
3
|
+
from tqdm import tqdm
|
4
|
+
import matplotlib.pyplot as plt
|
1
5
|
import pandas as pd
|
2
6
|
from pysal.lib import weights
|
3
|
-
|
4
|
-
|
7
|
+
|
8
|
+
warnings.filterwarnings("ignore")
|
5
9
|
|
6
10
|
|
7
11
|
def validate_inputs(df_results, required_columns):
|
@@ -40,24 +44,26 @@ def preprocess_data(df_results, dg_country):
|
|
40
44
|
dg_country["Country Region"] = dg_country["Country Region"].str.lower()
|
41
45
|
dg_country = dg_country[dg_country["Country Region"].isin(df["Country Region"])]
|
42
46
|
|
47
|
+
dg_country.reset_index(drop=True, inplace=True)
|
48
|
+
|
43
49
|
merged_df = dg_country.merge(df, on="Country Region", how="inner")
|
44
50
|
|
45
|
-
return merged_df
|
51
|
+
return merged_df
|
46
52
|
|
47
53
|
|
48
|
-
def create_base_weights(
|
54
|
+
def create_base_weights(merged_df):
|
49
55
|
"""
|
50
56
|
|
51
57
|
Args:
|
52
|
-
|
58
|
+
merged_df:
|
53
59
|
|
54
60
|
Returns:
|
55
61
|
|
56
62
|
"""
|
57
|
-
|
63
|
+
dg = merged_df[["Country Region", "geometry"]].drop_duplicates()
|
58
64
|
|
59
65
|
try:
|
60
|
-
w_base = weights.Queen.from_dataframe(
|
66
|
+
w_base = weights.Queen.from_dataframe(dg)
|
61
67
|
except Exception as e:
|
62
68
|
raise RuntimeError(f"Failed to create spatial weights: {e}")
|
63
69
|
|
@@ -65,13 +71,10 @@ def create_base_weights(dg_country):
|
|
65
71
|
index for index, neighbors in w_base.neighbors.items() if len(neighbors) == 0
|
66
72
|
]
|
67
73
|
if no_neighbors:
|
68
|
-
|
69
|
-
|
70
|
-
w_base = weights.Queen.from_dataframe(
|
71
|
-
dg_country[["Country Region", "geometry"]]
|
72
|
-
)
|
74
|
+
dg = dg.drop(index=no_neighbors[0]).reset_index(drop=True)
|
75
|
+
w_base = weights.Queen.from_dataframe(dg[["Country Region", "geometry"]])
|
73
76
|
|
74
|
-
return w_base,
|
77
|
+
return w_base, dg
|
75
78
|
|
76
79
|
|
77
80
|
def create_weights_for_year(dg_country, regions_with_data):
|
@@ -84,14 +87,22 @@ def create_weights_for_year(dg_country, regions_with_data):
|
|
84
87
|
Returns:
|
85
88
|
|
86
89
|
"""
|
87
|
-
|
90
|
+
dg = dg_country[dg_country["Country Region"].isin(regions_with_data)]
|
91
|
+
dg = dg.reset_index(drop=True)
|
92
|
+
|
93
|
+
wt = weights.Queen.from_dataframe(dg)
|
88
94
|
|
89
|
-
|
95
|
+
no_neighbors = [
|
96
|
+
index for index, neighbors in wt.neighbors.items() if len(neighbors) == 0
|
97
|
+
]
|
98
|
+
if no_neighbors:
|
99
|
+
dg = dg.drop(index=no_neighbors[0]).reset_index(drop=True)
|
100
|
+
wt = weights.Queen.from_dataframe(dg[["Country Region", "geometry"]])
|
90
101
|
|
91
|
-
return wt
|
102
|
+
return wt, dg
|
92
103
|
|
93
104
|
|
94
|
-
def compute_morans_i(merged_df
|
105
|
+
def compute_morans_i(merged_df):
|
95
106
|
"""
|
96
107
|
|
97
108
|
Args:
|
@@ -101,25 +112,35 @@ def compute_morans_i(merged_df, dg_country):
|
|
101
112
|
Returns:
|
102
113
|
|
103
114
|
"""
|
115
|
+
from pysal.explore import esda
|
116
|
+
|
117
|
+
# Drop any regions with missing data
|
118
|
+
merged_df = merged_df.dropna(subset=["Yield (tn per ha)"])
|
119
|
+
|
104
120
|
years = merged_df["Harvest Year"].unique()
|
105
121
|
results = {"Harvest Year": [], "Moran's I": [], "p-value": [], "Significant": []}
|
106
122
|
|
107
|
-
for year in years:
|
123
|
+
for year in tqdm(years, desc="Compute Moran's I"):
|
108
124
|
year_data = merged_df[merged_df["Harvest Year"] == year]
|
109
125
|
regions_with_data = year_data["Country Region"].unique()
|
110
126
|
year_data = year_data[year_data["Country Region"].isin(regions_with_data)]
|
111
127
|
|
112
|
-
y = year_data[["Region", "Yield (tn per ha)"]].drop_duplicates()
|
128
|
+
y = year_data[["Country Region", "Region", "Yield (tn per ha)"]].drop_duplicates()
|
129
|
+
dg_country = year_data[["Country Region", "geometry"]].drop_duplicates()
|
113
130
|
|
114
131
|
if len(y) > 1:
|
115
|
-
w = create_weights_for_year(dg_country, regions_with_data)
|
132
|
+
w, x = create_weights_for_year(dg_country, regions_with_data)
|
133
|
+
y = y[y["Country Region"].isin(x["Country Region"])]
|
116
134
|
|
117
135
|
try:
|
118
136
|
mi = esda.Moran(y["Yield (tn per ha)"].values, w, permutations=999)
|
119
137
|
except:
|
120
138
|
breakpoint()
|
121
139
|
results["Harvest Year"].append(year)
|
122
|
-
|
140
|
+
try:
|
141
|
+
results["Moran's I"].append(mi.I)
|
142
|
+
except:
|
143
|
+
breakpoint()
|
123
144
|
results["p-value"].append(mi.p_sim)
|
124
145
|
results["Significant"].append(mi.p_sim < 0.1)
|
125
146
|
else:
|
@@ -131,7 +152,7 @@ def compute_morans_i(merged_df, dg_country):
|
|
131
152
|
return pd.DataFrame(results)
|
132
153
|
|
133
154
|
|
134
|
-
def
|
155
|
+
def plot_morans_i_time_series(results_df, country, crop, dir_output):
|
135
156
|
"""
|
136
157
|
|
137
158
|
Args:
|
@@ -194,12 +215,10 @@ def compute_spatial_autocorrelation(df_results, **kwargs):
|
|
194
215
|
]
|
195
216
|
validate_inputs(df_results, required_columns)
|
196
217
|
|
197
|
-
merged_df
|
218
|
+
merged_df = preprocess_data(df_results, dg_country)
|
198
219
|
if merged_df.empty:
|
199
220
|
raise ValueError("No valid data available after preprocessing")
|
200
221
|
|
201
|
-
|
202
|
-
|
203
|
-
results_df = compute_morans_i(merged_df, dg_country)
|
222
|
+
results_df = compute_morans_i(merged_df)
|
204
223
|
|
205
|
-
|
224
|
+
plot_morans_i_time_series(results_df, country, crop, dir_output)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|