geocif 0.1.80__tar.gz → 0.1.82__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {geocif-0.1.80/geocif.egg-info → geocif-0.1.82}/PKG-INFO +1 -1
  2. {geocif-0.1.80 → geocif-0.1.82}/geocif/cei/indices.py +36 -24
  3. {geocif-0.1.80 → geocif-0.1.82}/geocif/geocif.py +37 -11
  4. {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner.py +2 -2
  5. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/embedding.py +4 -1
  6. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/stages.py +5 -0
  7. {geocif-0.1.80 → geocif-0.1.82/geocif.egg-info}/PKG-INFO +1 -1
  8. {geocif-0.1.80 → geocif-0.1.82}/setup.py +1 -1
  9. {geocif-0.1.80 → geocif-0.1.82}/LICENSE +0 -0
  10. {geocif-0.1.80 → geocif-0.1.82}/MANIFEST.in +0 -0
  11. {geocif-0.1.80 → geocif-0.1.82}/README.md +0 -0
  12. {geocif-0.1.80 → geocif-0.1.82}/geocif/__init__.py +0 -0
  13. {geocif-0.1.80 → geocif-0.1.82}/geocif/agmet/__init__.py +0 -0
  14. {geocif-0.1.80 → geocif-0.1.82}/geocif/agmet/geoagmet.py +0 -0
  15. {geocif-0.1.80 → geocif-0.1.82}/geocif/agmet/plot.py +0 -0
  16. {geocif-0.1.80 → geocif-0.1.82}/geocif/agmet/utils.py +0 -0
  17. {geocif-0.1.80 → geocif-0.1.82}/geocif/analysis.py +0 -0
  18. {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/__init__.py +0 -0
  19. {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/constants.py +0 -0
  20. {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/features.py +0 -0
  21. {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/geo.py +0 -0
  22. {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/geocif.py +0 -0
  23. {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/metadata.py +0 -0
  24. {geocif-0.1.80 → geocif-0.1.82}/geocif/backup/models.py +0 -0
  25. {geocif-0.1.80 → geocif-0.1.82}/geocif/cei/__init__.py +0 -0
  26. {geocif-0.1.80 → geocif-0.1.82}/geocif/cei/definitions.py +0 -0
  27. {geocif-0.1.80 → geocif-0.1.82}/geocif/experiments.py +0 -0
  28. {geocif-0.1.80 → geocif-0.1.82}/geocif/geocif_runner.py +0 -0
  29. {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_angola.py +0 -0
  30. {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_madagascar.py +0 -0
  31. {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_malawi.py +0 -0
  32. {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_mozambique.py +0 -0
  33. {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_south_africa.py +0 -0
  34. {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_zambia.py +0 -0
  35. {geocif-0.1.80 → geocif-0.1.82}/geocif/indices_runner_zimbabwe.py +0 -0
  36. {geocif-0.1.80 → geocif-0.1.82}/geocif/logger.py +0 -0
  37. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/__init__.py +0 -0
  38. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/correlations.py +0 -0
  39. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/feature_engineering.py +0 -0
  40. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/feature_selection.py +0 -0
  41. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/outliers.py +0 -0
  42. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/outlook.py +0 -0
  43. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/output.py +0 -0
  44. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/spatial_autocorrelation.py +0 -0
  45. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/stats.py +0 -0
  46. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/trainers.py +0 -0
  47. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/trend.py +0 -0
  48. {geocif-0.1.80 → geocif-0.1.82}/geocif/ml/xai.py +0 -0
  49. {geocif-0.1.80 → geocif-0.1.82}/geocif/mm.py +0 -0
  50. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/__init__.py +0 -0
  51. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/aa.py +0 -0
  52. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/area.py +0 -0
  53. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/automl.py +0 -0
  54. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/download_esi.py +0 -0
  55. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/enso.py +0 -0
  56. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/eval.py +0 -0
  57. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/gamtest.py +0 -0
  58. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/gee_access.py +0 -0
  59. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/misc.py +0 -0
  60. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/play_xagg.py +0 -0
  61. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/reg.py +0 -0
  62. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/sustain.py +0 -0
  63. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/test_catboost.py +0 -0
  64. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/tmp.py +0 -0
  65. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/tmp2.py +0 -0
  66. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/tmp3.py +0 -0
  67. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/tmp4.py +0 -0
  68. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/tmp5.py +0 -0
  69. {geocif-0.1.80 → geocif-0.1.82}/geocif/playground/wolayita_maize_mask.py +0 -0
  70. {geocif-0.1.80 → geocif-0.1.82}/geocif/risk/__init__.py +0 -0
  71. {geocif-0.1.80 → geocif-0.1.82}/geocif/risk/impact_assessment.py +0 -0
  72. {geocif-0.1.80 → geocif-0.1.82}/geocif/utils.py +0 -0
  73. {geocif-0.1.80 → geocif-0.1.82}/geocif/viz/__init__.py +0 -0
  74. {geocif-0.1.80 → geocif-0.1.82}/geocif/viz/gt.py +0 -0
  75. {geocif-0.1.80 → geocif-0.1.82}/geocif/viz/plot.py +0 -0
  76. {geocif-0.1.80 → geocif-0.1.82}/geocif/viz/tmp.py +0 -0
  77. {geocif-0.1.80 → geocif-0.1.82}/geocif.egg-info/SOURCES.txt +0 -0
  78. {geocif-0.1.80 → geocif-0.1.82}/geocif.egg-info/dependency_links.txt +0 -0
  79. {geocif-0.1.80 → geocif-0.1.82}/geocif.egg-info/not-zip-safe +0 -0
  80. {geocif-0.1.80 → geocif-0.1.82}/geocif.egg-info/top_level.txt +0 -0
  81. {geocif-0.1.80 → geocif-0.1.82}/requirements.txt +0 -0
  82. {geocif-0.1.80 → geocif-0.1.82}/setup.cfg +0 -0
  83. {geocif-0.1.80 → geocif-0.1.82}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.80
3
+ Version: 0.1.82
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -158,9 +158,14 @@ def adjust_dataframes(df: pd.DataFrame) -> pd.DataFrame:
158
158
  earliest_year = df["time"].dt.year.min()
159
159
  desired_start_year = earliest_year + 1
160
160
  desired_start_date_dynamic = pd.Timestamp(f"{desired_start_year}-01-01")
161
+
162
+ # Calculate the difference between the earliest date in the dataset and the desired start date
161
163
  min_date_new = df["time"].min()
162
164
  date_difference_dynamic = desired_start_date_dynamic - min_date_new
165
+
166
+ # Adjust all dates in the 'time' column forward by the calculated difference
163
167
  df["time"] = df["time"] + date_difference_dynamic
168
+
164
169
  return df
165
170
 
166
171
 
@@ -214,8 +219,9 @@ def get_icclim_dates(
214
219
  # end_br: latest date - 2 years
215
220
  end_br = str(df_all_years_ix.index[-1][2] - relativedelta(years=2))
216
221
 
217
- start_tr = np.datetime_as_string(df_harvest_year_ix.index[0][2])
218
- end_tr = np.datetime_as_string(df_harvest_year_ix.index[-1][2])
222
+ start_tr = np.datetime_as_string(df_harvest_year_ix.index[0][2].to_datetime64())
223
+ end_tr = np.datetime_as_string(df_harvest_year_ix.index[-1][2].to_datetime64())
224
+
219
225
  return start_br, end_br, start_tr, end_tr
220
226
 
221
227
 
@@ -244,8 +250,11 @@ def compute_indices(
244
250
  df_time_period = adjust_dataframes(df_time_period)
245
251
  df_base_period = adjust_dataframes(df_base_period)
246
252
 
247
- dx, vals_ix = df_to_xarray(df_base_period)
248
- start_br, end_br, start_tr, end_tr = get_icclim_dates(vals_ix, df_time_period.set_index(["lat", "lon", "time"]))
253
+ try:
254
+ dx, vals_ix = df_to_xarray(df_base_period)
255
+ start_br, end_br, start_tr, end_tr = get_icclim_dates(vals_ix, df_time_period.set_index(["lat", "lon", "time"]))
256
+ except:
257
+ breakpoint()
249
258
 
250
259
  # For seasonal indices, slice_mode is used, but for SPI indices it fails
251
260
  slice_mode = (
@@ -277,6 +286,7 @@ def compute_indices(
277
286
  "Error computing %s for %s to %s: %s",
278
287
  index_name, start_tr, end_tr, e
279
288
  )
289
+ breakpoint()
280
290
 
281
291
  return ds
282
292
 
@@ -319,11 +329,11 @@ METHOD_TO_COLUMN = {
319
329
  "full_season": "crop_cal",
320
330
  "fraction_season": "fraction_season",
321
331
  "dekad": "dekad",
322
- "dekad_r": "dekad",
332
+ "dekad_r": "dekad_r",
323
333
  "biweekly": "biweekly",
324
- "biweekly_r": "biweekly",
334
+ "biweekly_r": "biweekly_r",
325
335
  "monthly": "monthly",
326
- "monthly_r": "monthly"
336
+ "monthly_r": "monthly_r"
327
337
  }
328
338
 
329
339
 
@@ -545,7 +555,7 @@ class CEIs:
545
555
  if not col:
546
556
  raise ValueError(f"Unknown method: {self.method}")
547
557
 
548
- stages = sorted(df[col].unique())
558
+ stages = df[col].unique()
549
559
  valid_stages = None
550
560
 
551
561
  if self.method == "phenological_stages":
@@ -636,22 +646,24 @@ class CEIs:
636
646
  )
637
647
 
638
648
  # 1) ICCLIM-based indices
639
- for index_name, (index_type, index_details) in di.dict_indices.items():
640
- ds = compute_indices(df_time_period, df_base_period, index_name)
641
- if ds:
642
- df_out = ds.to_dataframe().reset_index()
643
- df_processed = self.process_row(
644
- df_out,
645
- df_harvest_year_region,
646
- extended_stage,
647
- key,
648
- index_name,
649
- index_type,
650
- index_details
651
- )
652
- if not df_processed.empty:
653
- frames_group.append(df_processed)
654
-
649
+ try:
650
+ for index_name, (index_type, index_details) in di.dict_indices.items():
651
+ ds = compute_indices(df_time_period, df_base_period, index_name)
652
+ if ds:
653
+ df_out = ds.to_dataframe().reset_index()
654
+ df_processed = self.process_row(
655
+ df_out,
656
+ df_harvest_year_region,
657
+ extended_stage,
658
+ key,
659
+ index_name,
660
+ index_type,
661
+ index_details
662
+ )
663
+ if not df_processed.empty:
664
+ frames_group.append(df_processed)
665
+ except:
666
+ breakpoint()
655
667
  # 2) EO indices (NDVI, ESI, GCVI, H-INDEX, etc.)
656
668
  for eo_var in ["GCVI", "NDVI", "ESI4WK", "H-INDEX"]:
657
669
  df_eo = self.compute_eo_indices(df_time_period, df_harvest_year_region, eo_var, key, extended_stage)
@@ -945,11 +945,12 @@ class Geocif:
945
945
  parts = all_cei_columns[-1].split("_")
946
946
  cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
947
947
 
948
- # For each region, find the column with the longest string in cei_column
949
- group_by = ["Region"]
950
- groups = df.groupby(group_by)
951
948
  if self.use_cumulative_features:
952
949
  frames = []
950
+ # For each region, find the column with the longest string in cei_column
951
+ group_by = ["Region"]
952
+ groups = df.groupby(group_by)
953
+
953
954
  for name, group in groups:
954
955
  # Drop columns with all NaNs
955
956
  group.dropna(axis=1, how="all", inplace=True)
@@ -1019,26 +1020,45 @@ class Geocif:
1019
1020
  # Drop those columns
1020
1021
 
1021
1022
  df = df.drop(columns=cols_to_drop)
1022
-
1023
+ from collections import Counter
1024
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1025
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1026
+ print("<0>", dupes)
1023
1027
  # Hack: If
1024
1028
  # Change column name
1025
1029
  # e.g. 'vDTR_7_6_5_4_3_2_1_37_36_35_34_33_32_31' to 'vDTR Mar 1-Oct 27'
1026
1030
  df = stages.update_feature_names(df, self.method)
1027
-
1031
+ from collections import Counter
1032
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1033
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1034
+ print("<111>", dupes)
1028
1035
  all_cei_columns = self.get_cei_column_names(df)
1029
1036
  # Fill in any missing values with 0
1030
1037
  df.loc[:, all_cei_columns].fillna(0, inplace=True)
1038
+ from collections import Counter
1039
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1040
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1041
+ print("<1>", dupes)
1031
1042
 
1032
1043
  df = fe.compute_last_year_yield(df, self.target)
1033
-
1044
+ from collections import Counter
1045
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1046
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1047
+ print("<2>", dupes)
1034
1048
  df = fe.compute_median_statistics(
1035
1049
  df, self.all_seasons_with_yield, self.number_median_years, self.target
1036
1050
  )
1037
-
1051
+ from collections import Counter
1052
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1053
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1054
+ print("<3>", dupes)
1038
1055
  df = fe.compute_user_median_statistics(df, range(2018, 2023))
1039
1056
 
1040
1057
  df = fe.compute_user_median_statistics(df, range(2013, 2018))
1041
-
1058
+ from collections import Counter
1059
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1060
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1061
+ print("<4>", dupes)
1042
1062
  if self.median_area_as_feature:
1043
1063
  df = fe.compute_median_statistics(
1044
1064
  df, self.all_seasons_with_yield, self.number_median_years, "Area (ha)"
@@ -1053,7 +1073,10 @@ class Geocif:
1053
1073
  df = fe.compute_analogous_yield(
1054
1074
  df, self.all_seasons_with_yield, self.number_median_years, self.target
1055
1075
  )
1056
-
1076
+ from collections import Counter
1077
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1078
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1079
+ print("5", dupes)
1057
1080
  # Create Region_ID column based on Region column category code
1058
1081
  df["Region"] = df["Region"].astype("category")
1059
1082
  if self.cluster_strategy == "single":
@@ -1067,7 +1090,7 @@ class Geocif:
1067
1090
 
1068
1091
  # Region_ID should be type category
1069
1092
  df["Region_ID"] = df["Region_ID"].astype("category")
1070
-
1093
+ breakpoint()
1071
1094
  return df
1072
1095
 
1073
1096
  def execute(self):
@@ -1474,7 +1497,8 @@ class Geocif:
1474
1497
  assert all_files, f"No files found in {_dir_country} with {file_name}"
1475
1498
 
1476
1499
  self.df_inputs = pd.concat(
1477
- (pd.read_csv(f) for f in all_files), ignore_index=True
1500
+ (pd.read_csv(f, engine="pyarrow") for f in tqdm(all_files, desc="Reading CSVs", leave=False)),
1501
+ ignore_index=True
1478
1502
  )
1479
1503
 
1480
1504
  self.df_inputs = stats.add_statistics(
@@ -1486,7 +1510,9 @@ class Geocif:
1486
1510
  [self.target] + self.statistics_columns,
1487
1511
  self.method,
1488
1512
  )
1513
+
1489
1514
  """ Add information on starting and ending time period for each stage"""
1515
+ self.logger.info("Adding starting and ending time period for each stage")
1490
1516
  self.df_inputs = stages.add_stage_information(self.df_inputs, self.method)
1491
1517
 
1492
1518
  self.df_inputs.to_csv(file, index=False)
@@ -165,7 +165,7 @@ class cei_runner(base.BaseGeo):
165
165
  combinations = [
166
166
  i
167
167
  for i in combinations
168
- if "ukraine_maize" in i[3]
168
+ if "ukraine" in i[3]
169
169
  # or "lesotho_maize" in i[3] or
170
170
  # # "namibia_" in i[2] or
171
171
  # "united_republic_of_tanzania_maize" in i[3]
@@ -179,7 +179,7 @@ class cei_runner(base.BaseGeo):
179
179
  ]
180
180
  # "malawi" in i[2]]
181
181
 
182
- if True or self.do_parallel:
182
+ if False and self.do_parallel:
183
183
  num_cpu = int(cpu_count() * 0.6)
184
184
  with Pool(num_cpu) as p:
185
185
  for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
@@ -32,7 +32,10 @@ def _compute_correlations(X, y):
32
32
  f_series = X[feature]
33
33
 
34
34
  # Ignore NaN values in either y or f_series
35
- mask = ~(np.isnan(y) | np.isnan(f_series))
35
+ try:
36
+ mask = ~(np.isnan(y) | np.isnan(f_series))
37
+ except:
38
+ breakpoint()
36
39
  y_filtered = y[mask]
37
40
  f_series_filtered = f_series[mask]
38
41
 
@@ -268,6 +268,11 @@ def update_feature_names(df, method):
268
268
  # Saving the result in the dictionary
269
269
  stages_info[element] = (cei, start_stage, end_stage, new_column_name)
270
270
 
271
+ # Check if any duplicates exist in the dictionary
272
+ if len(stages_info) != len(set(stages_info.values())):
273
+ breakpoint()
274
+ raise ValueError(f"Duplicate stage information found for {element}")
275
+ breakpoint()
271
276
  # For each column in df, check if it exists in stages_info, and
272
277
  # replace it with the new column name
273
278
  # Precompute the rename mapping outside the loop
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.80
3
+ Version: 0.1.82
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.80",
53
+ version="0.1.82",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes