geocif 0.1.81__tar.gz → 0.1.83__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {geocif-0.1.81/geocif.egg-info → geocif-0.1.83}/PKG-INFO +1 -1
  2. {geocif-0.1.81 → geocif-0.1.83}/geocif/cei/indices.py +1 -1
  3. {geocif-0.1.81 → geocif-0.1.83}/geocif/geocif.py +37 -11
  4. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/embedding.py +4 -1
  5. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/stages.py +5 -0
  6. {geocif-0.1.81 → geocif-0.1.83/geocif.egg-info}/PKG-INFO +1 -1
  7. {geocif-0.1.81 → geocif-0.1.83}/setup.py +1 -1
  8. {geocif-0.1.81 → geocif-0.1.83}/LICENSE +0 -0
  9. {geocif-0.1.81 → geocif-0.1.83}/MANIFEST.in +0 -0
  10. {geocif-0.1.81 → geocif-0.1.83}/README.md +0 -0
  11. {geocif-0.1.81 → geocif-0.1.83}/geocif/__init__.py +0 -0
  12. {geocif-0.1.81 → geocif-0.1.83}/geocif/agmet/__init__.py +0 -0
  13. {geocif-0.1.81 → geocif-0.1.83}/geocif/agmet/geoagmet.py +0 -0
  14. {geocif-0.1.81 → geocif-0.1.83}/geocif/agmet/plot.py +0 -0
  15. {geocif-0.1.81 → geocif-0.1.83}/geocif/agmet/utils.py +0 -0
  16. {geocif-0.1.81 → geocif-0.1.83}/geocif/analysis.py +0 -0
  17. {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/__init__.py +0 -0
  18. {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/constants.py +0 -0
  19. {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/features.py +0 -0
  20. {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/geo.py +0 -0
  21. {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/geocif.py +0 -0
  22. {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/metadata.py +0 -0
  23. {geocif-0.1.81 → geocif-0.1.83}/geocif/backup/models.py +0 -0
  24. {geocif-0.1.81 → geocif-0.1.83}/geocif/cei/__init__.py +0 -0
  25. {geocif-0.1.81 → geocif-0.1.83}/geocif/cei/definitions.py +0 -0
  26. {geocif-0.1.81 → geocif-0.1.83}/geocif/experiments.py +0 -0
  27. {geocif-0.1.81 → geocif-0.1.83}/geocif/geocif_runner.py +0 -0
  28. {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner.py +0 -0
  29. {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_angola.py +0 -0
  30. {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_madagascar.py +0 -0
  31. {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_malawi.py +0 -0
  32. {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_mozambique.py +0 -0
  33. {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_south_africa.py +0 -0
  34. {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_zambia.py +0 -0
  35. {geocif-0.1.81 → geocif-0.1.83}/geocif/indices_runner_zimbabwe.py +0 -0
  36. {geocif-0.1.81 → geocif-0.1.83}/geocif/logger.py +0 -0
  37. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/__init__.py +0 -0
  38. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/correlations.py +0 -0
  39. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/feature_engineering.py +0 -0
  40. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/feature_selection.py +0 -0
  41. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/outliers.py +0 -0
  42. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/outlook.py +0 -0
  43. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/output.py +0 -0
  44. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/spatial_autocorrelation.py +0 -0
  45. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/stats.py +0 -0
  46. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/trainers.py +0 -0
  47. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/trend.py +0 -0
  48. {geocif-0.1.81 → geocif-0.1.83}/geocif/ml/xai.py +0 -0
  49. {geocif-0.1.81 → geocif-0.1.83}/geocif/mm.py +0 -0
  50. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/__init__.py +0 -0
  51. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/aa.py +0 -0
  52. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/area.py +0 -0
  53. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/automl.py +0 -0
  54. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/download_esi.py +0 -0
  55. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/enso.py +0 -0
  56. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/eval.py +0 -0
  57. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/gamtest.py +0 -0
  58. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/gee_access.py +0 -0
  59. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/misc.py +0 -0
  60. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/play_xagg.py +0 -0
  61. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/reg.py +0 -0
  62. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/sustain.py +0 -0
  63. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/test_catboost.py +0 -0
  64. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/tmp.py +0 -0
  65. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/tmp2.py +0 -0
  66. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/tmp3.py +0 -0
  67. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/tmp4.py +0 -0
  68. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/tmp5.py +0 -0
  69. {geocif-0.1.81 → geocif-0.1.83}/geocif/playground/wolayita_maize_mask.py +0 -0
  70. {geocif-0.1.81 → geocif-0.1.83}/geocif/risk/__init__.py +0 -0
  71. {geocif-0.1.81 → geocif-0.1.83}/geocif/risk/impact_assessment.py +0 -0
  72. {geocif-0.1.81 → geocif-0.1.83}/geocif/utils.py +0 -0
  73. {geocif-0.1.81 → geocif-0.1.83}/geocif/viz/__init__.py +0 -0
  74. {geocif-0.1.81 → geocif-0.1.83}/geocif/viz/gt.py +0 -0
  75. {geocif-0.1.81 → geocif-0.1.83}/geocif/viz/plot.py +0 -0
  76. {geocif-0.1.81 → geocif-0.1.83}/geocif/viz/tmp.py +0 -0
  77. {geocif-0.1.81 → geocif-0.1.83}/geocif.egg-info/SOURCES.txt +0 -0
  78. {geocif-0.1.81 → geocif-0.1.83}/geocif.egg-info/dependency_links.txt +0 -0
  79. {geocif-0.1.81 → geocif-0.1.83}/geocif.egg-info/not-zip-safe +0 -0
  80. {geocif-0.1.81 → geocif-0.1.83}/geocif.egg-info/top_level.txt +0 -0
  81. {geocif-0.1.81 → geocif-0.1.83}/requirements.txt +0 -0
  82. {geocif-0.1.81 → geocif-0.1.83}/setup.cfg +0 -0
  83. {geocif-0.1.81 → geocif-0.1.83}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.81
3
+ Version: 0.1.83
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -555,7 +555,7 @@ class CEIs:
555
555
  if not col:
556
556
  raise ValueError(f"Unknown method: {self.method}")
557
557
 
558
- stages = sorted(df[col].unique())
558
+ stages = df[col].unique()
559
559
  valid_stages = None
560
560
 
561
561
  if self.method == "phenological_stages":
@@ -945,11 +945,12 @@ class Geocif:
945
945
  parts = all_cei_columns[-1].split("_")
946
946
  cei = parts[0] if parts[1].isdigit() else "_".join(parts[:2])
947
947
 
948
- # For each region, find the column with the longest string in cei_column
949
- group_by = ["Region"]
950
- groups = df.groupby(group_by)
951
948
  if self.use_cumulative_features:
952
949
  frames = []
950
+ # For each region, find the column with the longest string in cei_column
951
+ group_by = ["Region"]
952
+ groups = df.groupby(group_by)
953
+
953
954
  for name, group in groups:
954
955
  # Drop columns with all NaNs
955
956
  group.dropna(axis=1, how="all", inplace=True)
@@ -1019,26 +1020,45 @@ class Geocif:
1019
1020
  # Drop those columns
1020
1021
 
1021
1022
  df = df.drop(columns=cols_to_drop)
1022
-
1023
+ from collections import Counter
1024
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1025
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1026
+ print("<0>", dupes)
1023
1027
  # Hack: If
1024
1028
  # Change column name
1025
1029
  # e.g. 'vDTR_7_6_5_4_3_2_1_37_36_35_34_33_32_31' to 'vDTR Mar 1-Oct 27'
1026
1030
  df = stages.update_feature_names(df, self.method)
1027
-
1031
+ from collections import Counter
1032
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1033
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1034
+ print("<111>", dupes)
1028
1035
  all_cei_columns = self.get_cei_column_names(df)
1029
1036
  # Fill in any missing values with 0
1030
1037
  df.loc[:, all_cei_columns].fillna(0, inplace=True)
1038
+ from collections import Counter
1039
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1040
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1041
+ print("<1>", dupes)
1031
1042
 
1032
1043
  df = fe.compute_last_year_yield(df, self.target)
1033
-
1044
+ from collections import Counter
1045
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1046
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1047
+ print("<2>", dupes)
1034
1048
  df = fe.compute_median_statistics(
1035
1049
  df, self.all_seasons_with_yield, self.number_median_years, self.target
1036
1050
  )
1037
-
1051
+ from collections import Counter
1052
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1053
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1054
+ print("<3>", dupes)
1038
1055
  df = fe.compute_user_median_statistics(df, range(2018, 2023))
1039
1056
 
1040
1057
  df = fe.compute_user_median_statistics(df, range(2013, 2018))
1041
-
1058
+ from collections import Counter
1059
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1060
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1061
+ print("<4>", dupes)
1042
1062
  if self.median_area_as_feature:
1043
1063
  df = fe.compute_median_statistics(
1044
1064
  df, self.all_seasons_with_yield, self.number_median_years, "Area (ha)"
@@ -1053,7 +1073,10 @@ class Geocif:
1053
1073
  df = fe.compute_analogous_yield(
1054
1074
  df, self.all_seasons_with_yield, self.number_median_years, self.target
1055
1075
  )
1056
-
1076
+ from collections import Counter
1077
+ esi_cols = df.filter(like="AUC_ESI4WK").columns.tolist()
1078
+ dupes = {k: v for k, v in Counter(esi_cols).items() if v > 1}
1079
+ print("5", dupes)
1057
1080
  # Create Region_ID column based on Region column category code
1058
1081
  df["Region"] = df["Region"].astype("category")
1059
1082
  if self.cluster_strategy == "single":
@@ -1067,7 +1090,7 @@ class Geocif:
1067
1090
 
1068
1091
  # Region_ID should be type category
1069
1092
  df["Region_ID"] = df["Region_ID"].astype("category")
1070
-
1093
+ breakpoint()
1071
1094
  return df
1072
1095
 
1073
1096
  def execute(self):
@@ -1474,7 +1497,8 @@ class Geocif:
1474
1497
  assert all_files, f"No files found in {_dir_country} with {file_name}"
1475
1498
 
1476
1499
  self.df_inputs = pd.concat(
1477
- (pd.read_csv(f) for f in all_files), ignore_index=True
1500
+ (pd.read_csv(f, engine="pyarrow") for f in tqdm(all_files, desc="Reading CSVs", leave=False)),
1501
+ ignore_index=True
1478
1502
  )
1479
1503
 
1480
1504
  self.df_inputs = stats.add_statistics(
@@ -1486,7 +1510,9 @@ class Geocif:
1486
1510
  [self.target] + self.statistics_columns,
1487
1511
  self.method,
1488
1512
  )
1513
+
1489
1514
  """ Add information on starting and ending time period for each stage"""
1515
+ self.logger.info("Adding starting and ending time period for each stage")
1490
1516
  self.df_inputs = stages.add_stage_information(self.df_inputs, self.method)
1491
1517
 
1492
1518
  self.df_inputs.to_csv(file, index=False)
@@ -32,7 +32,10 @@ def _compute_correlations(X, y):
32
32
  f_series = X[feature]
33
33
 
34
34
  # Ignore NaN values in either y or f_series
35
- mask = ~(np.isnan(y) | np.isnan(f_series))
35
+ try:
36
+ mask = ~(np.isnan(y) | np.isnan(f_series))
37
+ except:
38
+ breakpoint()
36
39
  y_filtered = y[mask]
37
40
  f_series_filtered = f_series[mask]
38
41
 
@@ -268,6 +268,11 @@ def update_feature_names(df, method):
268
268
  # Saving the result in the dictionary
269
269
  stages_info[element] = (cei, start_stage, end_stage, new_column_name)
270
270
 
271
+ # Check if any duplicates exist in the dictionary
272
+ if len(stages_info) != len(set(stages_info.values())):
273
+ breakpoint()
274
+ raise ValueError(f"Duplicate stage information found for {element}")
275
+ breakpoint()
271
276
  # For each column in df, check if it exists in stages_info, and
272
277
  # replace it with the new column name
273
278
  # Precompute the rename mapping outside the loop
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.81
3
+ Version: 0.1.83
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.81",
53
+ version="0.1.83",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes