geocif 0.1.80__tar.gz → 0.1.82__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.80/geocif.egg-info → geocif-0.1.82}/PKG-INFO +1 -1
- {geocif-0.1.80 → geocif-0.1.82}/geocif/cei/indices.py +36 -24
- {geocif-0.1.80 → geocif-0.1.82}/geocif/geocif.py +37 -11
- {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner.py +2 -2
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/embedding.py +4 -1
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/stages.py +5 -0
- {geocif-0.1.80 → geocif-0.1.82/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.80 → geocif-0.1.82}/setup.py +1 -1
- {geocif-0.1.80 → geocif-0.1.82}/LICENSE +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/MANIFEST.in +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/README.md +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/__init__.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/analysis.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/constants.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/features.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/geo.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/models.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/cei/definitions.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/experiments.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/geocif_runner.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_angola.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_madagascar.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_malawi.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_mozambique.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_south_africa.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_zambia.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_zimbabwe.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/logger.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/correlations.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/feature_engineering.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/feature_selection.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/output.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/spatial_autocorrelation.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/stats.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/trend.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/xai.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/mm.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/aa.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/area.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/automl.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/download_esi.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/enso.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/eval.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/gamtest.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/gee_access.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/misc.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/play_xagg.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/reg.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/sustain.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/test_catboost.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/tmp.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/tmp2.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/tmp3.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/tmp4.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/tmp5.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/wolayita_maize_mask.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/risk/__init__.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/risk/impact_assessment.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/utils.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/viz/gt.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/viz/plot.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif/viz/tmp.py +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif.egg-info/SOURCES.txt +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/requirements.txt +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/setup.cfg +0 -0
- {geocif-0.1.80 → geocif-0.1.82}/tests/test_geocif.py +0 -0
@@ -158,9 +158,14 @@ def adjust_dataframes(df: pd.DataFrame) -> pd.DataFrame:
|
|
158
158
|
earliest_year = df["time"].dt.year.min()
|
159
159
|
desired_start_year = earliest_year + 1
|
160
160
|
desired_start_date_dynamic = pd.Timestamp(f"{desired_start_year}-01-01")
|
161
|
+
|
162
|
+
# Calculate the difference between the earliest date in the dataset and the desired start date
|
161
163
|
min_date_new = df["time"].min()
|
162
164
|
date_difference_dynamic = desired_start_date_dynamic - min_date_new
|
165
|
+
|
166
|
+
# Adjust all dates in the 'time' column forward by the calculated difference
|
163
167
|
df["time"] = df["time"] + date_difference_dynamic
|
168
|
+
|
164
169
|
return df
|
165
170
|
|
166
171
|
|
@@ -214,8 +219,9 @@ def get_icclim_dates(
|
|
214
219
|
# end_br: latest date - 2 years
|
215
220
|
end_br = str(df_all_years_ix.index[-1][2] - relativedelta(years=2))
|
216
221
|
|
217
|
-
start_tr = np.datetime_as_string(df_harvest_year_ix.index[0][2])
|
218
|
-
end_tr = np.datetime_as_string(df_harvest_year_ix.index[-1][2])
|
222
|
+
start_tr = np.datetime_as_string(df_harvest_year_ix.index[0][2].to_datetime64())
|
223
|
+
end_tr = np.datetime_as_string(df_harvest_year_ix.index[-1][2].to_datetime64())
|
224
|
+
|
219
225
|
return start_br, end_br, start_tr, end_tr
|
220
226
|
|
221
227
|
|
@@ -244,8 +250,11 @@ def compute_indices(
|
|
244
250
|
df_time_period = adjust_dataframes(df_time_period)
|
245
251
|
df_base_period = adjust_dataframes(df_base_period)
|
246
252
|
|
247
|
-
|
248
|
-
|
253
|
+
try:
|
254
|
+
dx, vals_ix = df_to_xarray(df_base_period)
|
255
|
+
start_br, end_br, start_tr, end_tr = get_icclim_dates(vals_ix, df_time_period.set_index(["lat", "lon", "time"]))
|
256
|
+
except:
|
257
|
+
breakpoint()
|
249
258
|
|
250
259
|
# For seasonal indices, slice_mode is used, but for SPI indices it fails
|
251
260
|
slice_mode = (
|
@@ -277,6 +286,7 @@ def compute_indices(
|
|
277
286
|
"Error computing %s for %s to %s: %s",
|
278
287
|
index_name, start_tr, end_tr, e
|
279
288
|
)
|
289
|
+
breakpoint()
|
280
290
|
|
281
291
|
return ds
|
282
292
|
|
@@ -319,11 +329,11 @@ METHOD_TO_COLUMN = {
|
|
319
329
|
"full_season": "crop_cal",
|
320
330
|
"fraction_season": "fraction_season",
|
321
331
|
"dekad": "dekad",
|
322
|
-
"dekad_r": "
|
332
|
+
"dekad_r": "dekad_r",
|
323
333
|
"biweekly": "biweekly",
|
324
|
-
"biweekly_r": "
|
334
|
+
"biweekly_r": "biweekly_r",
|
325
335
|
"monthly": "monthly",
|
326
|
-
"monthly_r": "
|
336
|
+
"monthly_r": "monthly_r"
|
327
337
|
}
|
328
338
|
|
329
339
|
|
@@ -545,7 +555,7 @@ class CEIs:
|
|
545
555
|
if not col:
|
546
556
|
raise ValueError(f"Unknown method: {self.method}")
|
547
557
|
|
548
|
-
stages =
|
558
|
+
stages = df[col].unique()
|
549
559
|
valid_stages = None
|
550
560
|
|
551
561
|
if self.method == "phenological_stages":
|
@@ -636,22 +646,24 @@ class CEIs:
|
|
636
646
|
)
|
637
647
|
|
638
648
|
# 1) ICCLIM-based indices
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
649
|
+
try:
|
650
|
+
for index_name, (index_type, index_details) in di.dict_indices.items():
|
651
|
+
ds = compute_indices(df_time_period, df_base_period, index_name)
|
652
|
+
if ds:
|
653
|
+
df_out = ds.to_dataframe().reset_index()
|
654
|
+
df_processed = self.process_row(
|
655
|
+
df_out,
|
656
|
+
df_harvest_year_region,
|
657
|
+
extended_stage,
|
658
|
+
key,
|
659
|
+
index_name,
|
660
|
+
index_type,
|
661
|
+
index_details
|
662
|
+
)
|
663
|
+
if not df_processed.empty:
|
664
|
+
frames_group.append(df_processed)
|
665
|
+
except:
|
666
|
+
breakpoint()
|
655
667
|
# 2) EO indices (NDVI, ESI, GCVI, H-INDEX, etc.)
|
656
668
|
for eo_var in ["GCVI", "NDVI", "ESI4WK", "H-INDEX"]:
|
657
669
|
df_eo = self.compute_eo_indices(df_time_period, df_harvest_year_region, eo_var, key, extended_stage)
|
@@ -945,11 +945,12 @@ class Geocif:
|
|
945
945
|
parts = all_cei_columns[-1].split("_")
|
946
946
|
cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
|
947
947
|
|
948
|
-
# For each region, find the column with the longest string in cei_column
|
949
|
-
group_by = ["Region"]
|
950
|
-
groups = df.groupby(group_by)
|
951
948
|
if self.use_cumulative_features:
|
952
949
|
frames = []
|
950
|
+
# For each region, find the column with the longest string in cei_column
|
951
|
+
group_by = ["Region"]
|
952
|
+
groups = df.groupby(group_by)
|
953
|
+
|
953
954
|
for name, group in groups:
|
954
955
|
# Drop columns with all NaNs
|
955
956
|
group.dropna(axis=1, how="all", inplace=True)
|
@@ -1019,26 +1020,45 @@ class Geocif:
|
|
1019
1020
|
# Drop those columns
|
1020
1021
|
|
1021
1022
|
df = df.drop(columns=cols_to_drop)
|
1022
|
-
|
1023
|
+
from collections import Counter
|
1024
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1025
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1026
|
+
print("<0>", dupes)
|
1023
1027
|
# Hack: If
|
1024
1028
|
# Change column name
|
1025
1029
|
# e.g. 'vDTR_7_6_5_4_3_2_1_37_36_35_34_33_32_31' to 'vDTR Mar 1-Oct 27'
|
1026
1030
|
df = stages.update_feature_names(df, self.method)
|
1027
|
-
|
1031
|
+
from collections import Counter
|
1032
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1033
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1034
|
+
print("<111>", dupes)
|
1028
1035
|
all_cei_columns = self.get_cei_column_names(df)
|
1029
1036
|
# Fill in any missing values with 0
|
1030
1037
|
df.loc[:, all_cei_columns].fillna(0, inplace=True)
|
1038
|
+
from collections import Counter
|
1039
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1040
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1041
|
+
print("<1>", dupes)
|
1031
1042
|
|
1032
1043
|
df = fe.compute_last_year_yield(df, self.target)
|
1033
|
-
|
1044
|
+
from collections import Counter
|
1045
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1046
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1047
|
+
print("<2>", dupes)
|
1034
1048
|
df = fe.compute_median_statistics(
|
1035
1049
|
df, self.all_seasons_with_yield, self.number_median_years, self.target
|
1036
1050
|
)
|
1037
|
-
|
1051
|
+
from collections import Counter
|
1052
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1053
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1054
|
+
print("<3>", dupes)
|
1038
1055
|
df = fe.compute_user_median_statistics(df, range(2018, 2023))
|
1039
1056
|
|
1040
1057
|
df = fe.compute_user_median_statistics(df, range(2013, 2018))
|
1041
|
-
|
1058
|
+
from collections import Counter
|
1059
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1060
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1061
|
+
print("<4>", dupes)
|
1042
1062
|
if self.median_area_as_feature:
|
1043
1063
|
df = fe.compute_median_statistics(
|
1044
1064
|
df, self.all_seasons_with_yield, self.number_median_years, "Area (ha)"
|
@@ -1053,7 +1073,10 @@ class Geocif:
|
|
1053
1073
|
df = fe.compute_analogous_yield(
|
1054
1074
|
df, self.all_seasons_with_yield, self.number_median_years, self.target
|
1055
1075
|
)
|
1056
|
-
|
1076
|
+
from collections import Counter
|
1077
|
+
esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
|
1078
|
+
dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
|
1079
|
+
print("5", dupes)
|
1057
1080
|
# Create Region_ID column based on Region column category code
|
1058
1081
|
df["Region"] = df["Region"].astype("category")
|
1059
1082
|
if self.cluster_strategy == "single":
|
@@ -1067,7 +1090,7 @@ class Geocif:
|
|
1067
1090
|
|
1068
1091
|
# Region_ID should be type category
|
1069
1092
|
df["Region_ID"] = df["Region_ID"].astype("category")
|
1070
|
-
|
1093
|
+
breakpoint()
|
1071
1094
|
return df
|
1072
1095
|
|
1073
1096
|
def execute(self):
|
@@ -1474,7 +1497,8 @@ class Geocif:
|
|
1474
1497
|
assert all_files, f"No files found in {_dir_country} with {file_name}"
|
1475
1498
|
|
1476
1499
|
self.df_inputs = pd.concat(
|
1477
|
-
(pd.read_csv(f) for f in all_files
|
1500
|
+
(pd.read_csv(f, engine="pyarrow") for f in tqdm(all_files, desc="Reading CSVs", leave=False)),
|
1501
|
+
ignore_index=True
|
1478
1502
|
)
|
1479
1503
|
|
1480
1504
|
self.df_inputs = stats.add_statistics(
|
@@ -1486,7 +1510,9 @@ class Geocif:
|
|
1486
1510
|
[self.target] + self.statistics_columns,
|
1487
1511
|
self.method,
|
1488
1512
|
)
|
1513
|
+
|
1489
1514
|
""" Add information on starting and ending time period for each stage"""
|
1515
|
+
self.logger.info("Adding starting and ending time period for each stage")
|
1490
1516
|
self.df_inputs = stages.add_stage_information(self.df_inputs, self.method)
|
1491
1517
|
|
1492
1518
|
self.df_inputs.to_csv(file, index=False)
|
@@ -165,7 +165,7 @@ class cei_runner(base.BaseGeo):
|
|
165
165
|
combinations = [
|
166
166
|
i
|
167
167
|
for i in combinations
|
168
|
-
if "
|
168
|
+
if "ukraine" in i[3]
|
169
169
|
# or "lesotho_maize" in i[3] or
|
170
170
|
# # "namibia_" in i[2] or
|
171
171
|
# "united_republic_of_tanzania_maize" in i[3]
|
@@ -179,7 +179,7 @@ class cei_runner(base.BaseGeo):
|
|
179
179
|
]
|
180
180
|
# "malawi" in i[2]]
|
181
181
|
|
182
|
-
if
|
182
|
+
if False and self.do_parallel:
|
183
183
|
num_cpu = int(cpu_count() * 0.6)
|
184
184
|
with Pool(num_cpu) as p:
|
185
185
|
for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
|
@@ -32,7 +32,10 @@ def _compute_correlations(X, y):
|
|
32
32
|
f_series = X[feature]
|
33
33
|
|
34
34
|
# Ignore NaN values in either y or f_series
|
35
|
-
|
35
|
+
try:
|
36
|
+
mask = ~(np.isnan(y) | np.isnan(f_series))
|
37
|
+
except:
|
38
|
+
breakpoint()
|
36
39
|
y_filtered = y[mask]
|
37
40
|
f_series_filtered = f_series[mask]
|
38
41
|
|
@@ -268,6 +268,11 @@ def update_feature_names(df, method):
|
|
268
268
|
# Saving the result in the dictionary
|
269
269
|
stages_info[element] = (cei, start_stage, end_stage, new_column_name)
|
270
270
|
|
271
|
+
# Check if any duplicates exist in the dictionary
|
272
|
+
if len(stages_info) != len(set(stages_info.values())):
|
273
|
+
breakpoint()
|
274
|
+
raise ValueError(f"Duplicate stage information found for {element}")
|
275
|
+
breakpoint()
|
271
276
|
# For each column in df, check if it exists in stages_info, and
|
272
277
|
# replace it with the new column name
|
273
278
|
# Precompute the rename mapping outside the loop
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|