geocif 0.1.67__tar.gz → 0.1.68__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.67/geocif.egg-info → geocif-0.1.68}/PKG-INFO +1 -1
- {geocif-0.1.67 → geocif-0.1.68}/geocif/cei/definitions.py +8 -8
- {geocif-0.1.67 → geocif-0.1.68}/geocif/geocif.py +21 -18
- {geocif-0.1.67 → geocif-0.1.68}/geocif/geocif_runner.py +34 -35
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/feature_selection.py +15 -1
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/stats.py +1 -1
- {geocif-0.1.67 → geocif-0.1.68}/geocif/viz/tmp.py +20 -7
- {geocif-0.1.67 → geocif-0.1.68/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.67 → geocif-0.1.68}/setup.py +1 -1
- {geocif-0.1.67 → geocif-0.1.68}/LICENSE +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/MANIFEST.in +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/README.md +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/__init__.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/analysis.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/constants.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/features.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/geo.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/backup/models.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/cei/indices.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/experiments.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_angola.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_madagascar.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_malawi.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_mozambique.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_south_africa.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_zambia.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/indices_runner_zimbabwe.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/logger.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/correlations.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/embedding.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/feature_engineering.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/output.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/spatial_autocorrelation.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/stages.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/trend.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/ml/xai.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/mm.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/aa.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/area.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/automl.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/download_esi.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/enso.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/eval.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/gamtest.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/gee_access.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/misc.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/play_xagg.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/reg.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/sustain.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/test_catboost.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/tmp.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/tmp2.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/tmp3.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/tmp4.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/playground/tmp5.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/risk/__init__.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/risk/impact_assessment.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/utils.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif/viz/plot.py +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif.egg-info/SOURCES.txt +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/requirements.txt +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/setup.cfg +0 -0
- {geocif-0.1.67 → geocif-0.1.68}/tests/test_geocif.py +0 -0
@@ -1,11 +1,11 @@
|
|
1
1
|
PHENOLOGICAL_STAGES = [1, 2, 3]
|
2
2
|
dict_indices = {
|
3
3
|
"GD4": ["Cold", "Growing degree days (sum of Tmean > 4 C)"],
|
4
|
-
|
5
|
-
|
4
|
+
"CFD": ["Cold", "Maximum number of consecutive frost days (Tmin < 0 C)"],
|
5
|
+
"FD": ["Cold", "Number of Frost Days (Tmin < 0C)"],
|
6
6
|
"HD17": ["Cold", "Heating degree days (sum of Tmean < 17 C)"],
|
7
|
-
|
8
|
-
|
7
|
+
"ID": ["Cold", "Number of sharp Ice Days (Tmax < 0C)"],
|
8
|
+
"CSDI": ["Cold", "Cold-spell duration index"],
|
9
9
|
"TG10p": ["Cold", "Percentage of days when Tmean < 10th percentile"],
|
10
10
|
"TN10p": ["Cold", "Percentage of days when Tmin < 10th percentile"],
|
11
11
|
"TXn": ["Cold", "Minimum daily maximum temperature"],
|
@@ -70,10 +70,10 @@ dict_indices = {
|
|
70
70
|
"Compound",
|
71
71
|
"Days with TG > 75th percentile of daily mean temperature and RR >75th percentile of daily precipitation sum",
|
72
72
|
],
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
73
|
+
"SD": ["Snow", "Mean of daily snow depth"],
|
74
|
+
"SD1": ["Snow", "Number of days with snow depth >= 1 cm"],
|
75
|
+
"SD5cm": ["Snow", "Number of days with snow depth >= 5 cm"],
|
76
|
+
"SD50cm": ["Snow", "Number of days with snow depth >= 50 cm"],
|
77
77
|
}
|
78
78
|
|
79
79
|
dict_ndvi = {
|
@@ -598,15 +598,15 @@ class Geocif:
|
|
598
598
|
df_region[f"Median {self.target}"].values, 3
|
599
599
|
)
|
600
600
|
|
601
|
-
if f"Median {self.target} (2014-2018)" in df_region.columns:
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
if f"Median {self.target} (2013-2017)" in df_region.columns:
|
607
|
-
|
608
|
-
|
609
|
-
|
601
|
+
# if f"Median {self.target} (2014-2018)" in df_region.columns:
|
602
|
+
# df.loc[:, f"Median {self.target} (2014-2018)"] = np.around(
|
603
|
+
# df_region[f"Median {self.target} (2014-2018)"].values, 3
|
604
|
+
# )
|
605
|
+
#
|
606
|
+
# if f"Median {self.target} (2013-2017)" in df_region.columns:
|
607
|
+
# df.loc[:, f"Median {self.target} (2013-2017)"] = np.around(
|
608
|
+
# df_region[f"Median {self.target} (2013-2017)"].values, 3
|
609
|
+
# )
|
610
610
|
|
611
611
|
if self.estimate_ci:
|
612
612
|
if self.estimate_ci_for_all or self.forecast_season == self.today_year:
|
@@ -820,8 +820,8 @@ class Geocif:
|
|
820
820
|
+ self.statistics_columns
|
821
821
|
+ self.feature_names
|
822
822
|
+ [f"Median {self.target}"]
|
823
|
-
|
824
|
-
|
823
|
+
#+ [f"Median {self.target} (2014-2018)"]
|
824
|
+
#+ [f"Median {self.target} (2013-2017)"]
|
825
825
|
+ ["Region_ID"]
|
826
826
|
)
|
827
827
|
if self.check_yield_trend:
|
@@ -1011,13 +1011,13 @@ class Geocif:
|
|
1011
1011
|
df, self.all_seasons_with_yield, self.number_median_years, self.target
|
1012
1012
|
)
|
1013
1013
|
|
1014
|
-
df = fe.compute_user_median_statistics(
|
1015
|
-
|
1016
|
-
)
|
1017
|
-
|
1018
|
-
df = fe.compute_user_median_statistics(
|
1019
|
-
|
1020
|
-
)
|
1014
|
+
# df = fe.compute_user_median_statistics(
|
1015
|
+
# df, [2014, 2015, 2016, 2017, 2018]
|
1016
|
+
# )
|
1017
|
+
#
|
1018
|
+
# df = fe.compute_user_median_statistics(
|
1019
|
+
# df, [2013, 2014, 2015, 2016, 2017]
|
1020
|
+
# )
|
1021
1021
|
|
1022
1022
|
if self.median_area_as_feature:
|
1023
1023
|
df = fe.compute_median_statistics(
|
@@ -1393,6 +1393,9 @@ class Geocif:
|
|
1393
1393
|
self.dg["ADM0_NAME"].str.lower().str.replace(" ", "_") == self.country
|
1394
1394
|
]
|
1395
1395
|
|
1396
|
+
# Drop any duplicates based on Country Region column
|
1397
|
+
self.dg_country = self.dg_country.drop_duplicates(subset=["Country Region"])
|
1398
|
+
|
1396
1399
|
def read_data(self, country, crop, season):
|
1397
1400
|
"""
|
1398
1401
|
|
@@ -26,41 +26,40 @@ def loop_execute(inputs):
|
|
26
26
|
Returns:
|
27
27
|
|
28
28
|
"""
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
obj.execute()
|
29
|
+
from pycallgraph2 import Config, PyCallGraph, GlobbingFilter
|
30
|
+
from pycallgraph2.output import GraphvizOutput
|
31
|
+
|
32
|
+
graphviz = GraphvizOutput()
|
33
|
+
graphviz.output_file = "geocif_visualization.png"
|
34
|
+
plt.rcParams["figure.dpi"] = 600
|
35
|
+
config = Config(max_depth=5)
|
36
|
+
config.trace_filter = GlobbingFilter(
|
37
|
+
exclude=[
|
38
|
+
"pycallgraph.*",
|
39
|
+
]
|
40
|
+
)
|
41
|
+
|
42
|
+
with PyCallGraph(output=graphviz, config=config):
|
43
|
+
project_name, country, crop, season, model, logger, parser, index = inputs
|
44
|
+
|
45
|
+
logger.info("=====================================================")
|
46
|
+
logger.info(f"\tStarting GEOCIF: {country} {crop} {season} {model}")
|
47
|
+
logger.info("=====================================================")
|
48
|
+
|
49
|
+
obj = geocif.Geocif(logger=logger,
|
50
|
+
parser=parser,
|
51
|
+
project_name=project_name)
|
52
|
+
obj.read_data(country, crop, season)
|
53
|
+
|
54
|
+
# Store config file in database, only execute this for
|
55
|
+
# the first iteration of the loop
|
56
|
+
if index == 0:
|
57
|
+
output.config_to_db(obj.db_path, obj.parser, obj.today)
|
58
|
+
|
59
|
+
# Setup metadata and run ML code
|
60
|
+
obj.setup(season, model)
|
61
|
+
if obj.simulation_stages:
|
62
|
+
obj.execute()
|
64
63
|
|
65
64
|
|
66
65
|
def gather_inputs(parser):
|
@@ -188,10 +188,24 @@ def select_features(X, y, method="RFE", min_features_to_select=3, threshold_nan=
|
|
188
188
|
|
189
189
|
# Get the selected feature names
|
190
190
|
selected_features = X.columns[selected_features].tolist()
|
191
|
+
elif method == "lasso":
|
192
|
+
from sklearn.linear_model import LassoLarsCV
|
193
|
+
from sklearn.feature_selection import SelectFromModel
|
194
|
+
|
195
|
+
# Fit Lasso model (L1 regularization) to perform feature selection
|
196
|
+
lasso = LassoLarsCV(cv=5)
|
197
|
+
lasso.fit(X, y)
|
198
|
+
|
199
|
+
# Use SelectFromModel to remove features with zero coefficients
|
200
|
+
selector = SelectFromModel(lasso, prefit=True)
|
201
|
+
|
202
|
+
# Get the selected features
|
203
|
+
selected_features = X.columns[selector.get_support()].tolist()
|
204
|
+
print(selected_features)
|
191
205
|
elif method == "BorutaPy":
|
192
206
|
from boruta import BorutaPy
|
193
207
|
|
194
|
-
selector = BorutaPy(forest, n_estimators="auto", random_state=42)
|
208
|
+
selector = BorutaPy(forest, n_estimators="auto", random_state=42, verbose=0)
|
195
209
|
selector.fit(X.values, y.values)
|
196
210
|
selected_features_mask = selector.support_
|
197
211
|
selected_features = X.columns[selected_features_mask].tolist()
|
@@ -203,7 +203,7 @@ def add_statistics(
|
|
203
203
|
fn = "illinois.csv"
|
204
204
|
elif country == "Ethiopia":
|
205
205
|
# HACK
|
206
|
-
fn = "
|
206
|
+
fn = "adm_crop_production.csv"
|
207
207
|
else:
|
208
208
|
fn = "adm_crop_production.csv"
|
209
209
|
df_fewsnet = pd.read_csv(dir_stats / fn, low_memory=False)
|
@@ -1,6 +1,4 @@
|
|
1
1
|
import geopandas as gpd
|
2
|
-
import pandas as pd
|
3
|
-
import matplotlib.pyplot as plt
|
4
2
|
import palettable as pal
|
5
3
|
import matplotlib.colors as mcolors
|
6
4
|
|
@@ -9,7 +7,7 @@ import glob
|
|
9
7
|
import os
|
10
8
|
|
11
9
|
# 1. Specify the directory containing your .dta files:
|
12
|
-
data_dir = r"
|
10
|
+
data_dir = r"."
|
13
11
|
|
14
12
|
# 2. Use glob to find all .dta files in that directory:
|
15
13
|
dta_files = glob.glob(os.path.join(data_dir, "*.dta"))
|
@@ -20,6 +18,13 @@ dataframes = [pd.read_stata(f) for f in dta_files]
|
|
20
18
|
# 4. Concatenate them all into one DataFrame (row-wise):
|
21
19
|
merged_df = pd.concat(dataframes, ignore_index=True)
|
22
20
|
|
21
|
+
# Replace null values in PROD98CQ with those in PROD columns
|
22
|
+
merged_df['PROD98CQ'] = merged_df['PROD98CQ'].fillna(merged_df['PROD'])
|
23
|
+
merged_df['YEAR'] = merged_df['YEAR'].fillna(merged_df['year'])
|
24
|
+
|
25
|
+
# Drop rows where AREAH is 0
|
26
|
+
merged_df = merged_df[merged_df['AREAH'] != 0]
|
27
|
+
|
23
28
|
merged_df['ZONE'] = merged_df['ZONE'].astype(int)
|
24
29
|
merged_df['DIST'] = merged_df['DIST'].astype(int)
|
25
30
|
|
@@ -36,7 +41,7 @@ merged_df['W_CODE'] = '7' + merged_df['W_CODE']
|
|
36
41
|
merged_df['W_CODE'] = merged_df['W_CODE'].str.replace('.0', '')
|
37
42
|
merged_df['W_CODE'] = merged_df['W_CODE'].astype(int)
|
38
43
|
|
39
|
-
dg = gpd.read_file(r"
|
44
|
+
dg = gpd.read_file(r"wolayita_dissolved.shp")
|
40
45
|
dg = dg[['W_CODE', 'W_NAME']]
|
41
46
|
|
42
47
|
# Merge the two dataframes on W_CODE
|
@@ -48,8 +53,8 @@ merged_df = merged_df.dropna(subset=['PROD98CQ', 'AREAH'])
|
|
48
53
|
# Compte yield column
|
49
54
|
merged_df['yield'] = merged_df['PROD98CQ'] / merged_df['AREAH']
|
50
55
|
|
51
|
-
# create a new dataframe which computes average yield by W_NAME for each year
|
52
|
-
df_avg_yield = merged_df.groupby(['W_NAME', 'YEAR'])['yield']
|
56
|
+
# create a new dataframe which computes average yield by W_NAME for each year, do a weighted average using FWEIGHT column
|
57
|
+
df_avg_yield = merged_df.groupby(['W_NAME', 'YEAR']).apply(lambda x: np.average(x['yield'], weights=x['FWEIGHT'])).reset_index(name='yield')
|
53
58
|
|
54
59
|
# Change W_NAME column to title case
|
55
60
|
df_avg_yield['W_NAME'] = df_avg_yield['W_NAME'].str.title()
|
@@ -64,7 +69,15 @@ df_avg_yield = df_avg_yield.pivot(index='W_NAME', columns='YEAR', values='yield'
|
|
64
69
|
df_avg_yield.index.name = None
|
65
70
|
df_avg_yield.columns.name = None
|
66
71
|
|
67
|
-
df_avg_yield.to_csv('
|
72
|
+
df_avg_yield.to_csv('wolayita_yields_v8.csv')
|
73
|
+
breakpoint()
|
74
|
+
# Compare wolayita_yields_v2.csv with wolayita_yields.csv
|
75
|
+
# 1. Load the two CSV files
|
76
|
+
df_v1 = pd.read_csv('wolayita_yields.csv')
|
77
|
+
df_v2 = pd.read_csv('wolayita_yields_v2.csv')
|
78
|
+
|
79
|
+
# 2. Check if the two DataFrames are equal
|
80
|
+
print(df_v1.equals(df_v2))
|
68
81
|
|
69
82
|
breakpoint()
|
70
83
|
# 5. (Optional) Inspect the merged DataFrame
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|