geocif 0.2.67__tar.gz → 0.2.69__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. {geocif-0.2.67/geocif.egg-info → geocif-0.2.69}/PKG-INFO +13 -2
  2. {geocif-0.2.67 → geocif-0.2.69}/geocif/geocif.py +3 -1
  3. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/feature_selection.py +24 -5
  4. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/stages.py +10 -7
  5. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/stats.py +5 -5
  6. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/aa.py +1 -0
  7. {geocif-0.2.67 → geocif-0.2.69/geocif.egg-info}/PKG-INFO +13 -2
  8. {geocif-0.2.67 → geocif-0.2.69}/setup.py +1 -1
  9. {geocif-0.2.67 → geocif-0.2.69}/LICENSE +0 -0
  10. {geocif-0.2.67 → geocif-0.2.69}/MANIFEST.in +0 -0
  11. {geocif-0.2.67 → geocif-0.2.69}/README.md +0 -0
  12. {geocif-0.2.67 → geocif-0.2.69}/geocif/__init__.py +0 -0
  13. {geocif-0.2.67 → geocif-0.2.69}/geocif/agmet/__init__.py +0 -0
  14. {geocif-0.2.67 → geocif-0.2.69}/geocif/agmet/geoagmet.py +0 -0
  15. {geocif-0.2.67 → geocif-0.2.69}/geocif/agmet/plot.py +0 -0
  16. {geocif-0.2.67 → geocif-0.2.69}/geocif/agmet/utils.py +0 -0
  17. {geocif-0.2.67 → geocif-0.2.69}/geocif/analysis.py +0 -0
  18. {geocif-0.2.67 → geocif-0.2.69}/geocif/backup/__init__.py +0 -0
  19. {geocif-0.2.67 → geocif-0.2.69}/geocif/backup/constants.py +0 -0
  20. {geocif-0.2.67 → geocif-0.2.69}/geocif/backup/features.py +0 -0
  21. {geocif-0.2.67 → geocif-0.2.69}/geocif/backup/geo.py +0 -0
  22. {geocif-0.2.67 → geocif-0.2.69}/geocif/backup/geocif.py +0 -0
  23. {geocif-0.2.67 → geocif-0.2.69}/geocif/backup/metadata.py +0 -0
  24. {geocif-0.2.67 → geocif-0.2.69}/geocif/backup/models.py +0 -0
  25. {geocif-0.2.67 → geocif-0.2.69}/geocif/cei/__init__.py +0 -0
  26. {geocif-0.2.67 → geocif-0.2.69}/geocif/cei/definitions.py +0 -0
  27. {geocif-0.2.67 → geocif-0.2.69}/geocif/cei/indices.py +0 -0
  28. {geocif-0.2.67 → geocif-0.2.69}/geocif/experiments.py +0 -0
  29. {geocif-0.2.67 → geocif-0.2.69}/geocif/geocif_runner.py +0 -0
  30. {geocif-0.2.67 → geocif-0.2.69}/geocif/indices_runner.py +0 -0
  31. {geocif-0.2.67 → geocif-0.2.69}/geocif/indices_runner_algeria.py +0 -0
  32. {geocif-0.2.67 → geocif-0.2.69}/geocif/indices_runner_angola.py +0 -0
  33. {geocif-0.2.67 → geocif-0.2.69}/geocif/indices_runner_madagascar.py +0 -0
  34. {geocif-0.2.67 → geocif-0.2.69}/geocif/indices_runner_malawi.py +0 -0
  35. {geocif-0.2.67 → geocif-0.2.69}/geocif/indices_runner_mozambique.py +0 -0
  36. {geocif-0.2.67 → geocif-0.2.69}/geocif/indices_runner_south_africa.py +0 -0
  37. {geocif-0.2.67 → geocif-0.2.69}/geocif/indices_runner_zambia.py +0 -0
  38. {geocif-0.2.67 → geocif-0.2.69}/geocif/indices_runner_zimbabwe.py +0 -0
  39. {geocif-0.2.67 → geocif-0.2.69}/geocif/logger.py +0 -0
  40. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/__init__.py +0 -0
  41. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/correlations.py +0 -0
  42. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/embedding.py +0 -0
  43. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/feature_engineering.py +0 -0
  44. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/outliers.py +0 -0
  45. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/outlook.py +0 -0
  46. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/output.py +0 -0
  47. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/spatial_autocorrelation.py +0 -0
  48. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/trainers.py +0 -0
  49. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/trend.py +0 -0
  50. {geocif-0.2.67 → geocif-0.2.69}/geocif/ml/xai.py +0 -0
  51. {geocif-0.2.67 → geocif-0.2.69}/geocif/mm.py +0 -0
  52. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/__init__.py +0 -0
  53. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/aaaa.py +0 -0
  54. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/area.py +0 -0
  55. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/automl.py +0 -0
  56. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/download_esi.py +0 -0
  57. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/enso.py +0 -0
  58. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/eval.py +0 -0
  59. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/gamtest.py +0 -0
  60. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/gee_access.py +0 -0
  61. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/misc.py +0 -0
  62. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/play_xagg.py +0 -0
  63. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/reg.py +0 -0
  64. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/sustain.py +0 -0
  65. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/test_catboost.py +0 -0
  66. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/tmp.py +0 -0
  67. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/tmp2.py +0 -0
  68. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/tmp3.py +0 -0
  69. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/tmp4.py +0 -0
  70. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/tmp5.py +0 -0
  71. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/wolayita.py +0 -0
  72. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/wolayita_maize_mask.py +0 -0
  73. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/wolayita_v2.py +0 -0
  74. {geocif-0.2.67 → geocif-0.2.69}/geocif/playground/wolayita_v3.py +0 -0
  75. {geocif-0.2.67 → geocif-0.2.69}/geocif/risk/__init__.py +0 -0
  76. {geocif-0.2.67 → geocif-0.2.69}/geocif/risk/impact_assessment.py +0 -0
  77. {geocif-0.2.67 → geocif-0.2.69}/geocif/utils.py +0 -0
  78. {geocif-0.2.67 → geocif-0.2.69}/geocif/viz/__init__.py +0 -0
  79. {geocif-0.2.67 → geocif-0.2.69}/geocif/viz/gt.py +0 -0
  80. {geocif-0.2.67 → geocif-0.2.69}/geocif/viz/plot.py +0 -0
  81. {geocif-0.2.67 → geocif-0.2.69}/geocif/viz/tmp.py +0 -0
  82. {geocif-0.2.67 → geocif-0.2.69}/geocif/viz/viz_ml.py +0 -0
  83. {geocif-0.2.67 → geocif-0.2.69}/geocif.egg-info/SOURCES.txt +0 -0
  84. {geocif-0.2.67 → geocif-0.2.69}/geocif.egg-info/dependency_links.txt +0 -0
  85. {geocif-0.2.67 → geocif-0.2.69}/geocif.egg-info/not-zip-safe +0 -0
  86. {geocif-0.2.67 → geocif-0.2.69}/geocif.egg-info/top_level.txt +0 -0
  87. {geocif-0.2.67 → geocif-0.2.69}/requirements.txt +0 -0
  88. {geocif-0.2.67 → geocif-0.2.69}/setup.cfg +0 -0
  89. {geocif-0.2.67 → geocif-0.2.69}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: geocif
3
- Version: 0.2.67
3
+ Version: 0.2.69
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -16,6 +16,17 @@ Classifier: Programming Language :: Python :: 3.9
16
16
  Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
+ Dynamic: author
20
+ Dynamic: author-email
21
+ Dynamic: classifier
22
+ Dynamic: description
23
+ Dynamic: description-content-type
24
+ Dynamic: home-page
25
+ Dynamic: keywords
26
+ Dynamic: license
27
+ Dynamic: license-file
28
+ Dynamic: requires-python
29
+ Dynamic: summary
19
30
 
20
31
  # geocif
21
32
 
@@ -836,12 +836,13 @@ class Geocif:
836
836
 
837
837
  """ Feature selection and then Train """
838
838
  # Filter dataframe based on region and self.feature_names
839
+ breakpoint()
839
840
  df_region_train = self.df_train[mask_train]
840
841
  df_region_train = df_region_train[self.fixed_columns + common_columns]
841
842
  df_region_train.reset_index(drop=True, inplace=True)
842
843
  df_region_train = df_region_train.dropna(subset=[self.target_column])
843
844
 
844
- self.X_train = df_region_train[self.feature_names]
845
+ self.X_train = df_region_train[self.feature_names + ["Region"]]
845
846
 
846
847
  # Drop any columns with NaNs except the lag yield columns
847
848
  lag_prefix = "t -"
@@ -1543,6 +1544,7 @@ class Geocif:
1543
1544
  self.logger.info("Adding starting and ending time period for each stage")
1544
1545
  self.df_inputs = stages.add_stage_information(self.df_inputs, self.method)
1545
1546
 
1547
+ self.logger.info("Writing input file to disk")
1546
1548
  self.df_inputs.to_csv(file, index=False)
1547
1549
  else:
1548
1550
  self.df_inputs = pd.read_csv(file)
@@ -65,7 +65,6 @@ def select_features(
65
65
  X_filtered : pd.DataFrame of selected features
66
66
  selected_features : list[str]
67
67
  """
68
-
69
68
  # copy original for multi-mode recursion
70
69
  X_clean = X.copy()
71
70
 
@@ -250,10 +249,30 @@ def select_features(
250
249
 
251
250
  elif method == "BorutaPy":
252
251
  from boruta import BorutaPy
253
- sel = BorutaPy(forest, n_estimators="auto", random_state=42, verbose=0)
254
- sel.fit(X_clean.values, y)
255
- mask = sel.support_ | sel.support_weak_
256
- selected = X_clean.columns[mask].tolist()
252
+ from collections import Counter
253
+ import itertools as it
254
+
255
+ region_selected = {} # {region: [features …]}
256
+ for region in tqdm(X_clean["Region"].unique(), desc="BorutaPy", leave=False):
257
+ idx = X_clean["Region"] == region
258
+ X_region = X_clean.loc[idx].drop(columns=["Region"])
259
+ y_region = y.loc[idx] if hasattr(y, "loc") else y[idx]
260
+
261
+ sel = BorutaPy(
262
+ estimator=forest,
263
+ n_estimators="auto",
264
+ random_state=42,
265
+ verbose=0
266
+ )
267
+ sel.fit(X_region.values, y_region.values)
268
+
269
+ region_selected[region] = (
270
+ X_region.columns[sel.support_ | sel.support_weak_].tolist()
271
+ )
272
+
273
+ # ─── 3. keep features chosen in ≥ 2 regions ------------------------------
274
+ counts = Counter(it.chain.from_iterable(region_selected.values()))
275
+ selected = [feat for feat, n in counts.items() if n >= 2]
257
276
 
258
277
  elif method == "Leshy":
259
278
  import arfs.feature_selection.allrelevant as arfsgroot
@@ -1,5 +1,6 @@
1
1
  import numpy as np
2
2
  from typing import Union
3
+ from tqdm import tqdm
3
4
 
4
5
  from geocif import utils
5
6
 
@@ -46,14 +47,16 @@ def add_stage_information(df, method):
46
47
  df["Starting Stage"].map(dict) + " - " + df["Ending Stage"].map(dict)
47
48
  )
48
49
 
49
- # Group by Region, Harvest Year
50
- # For each group, add a column called Percentage Season
51
- # that is the percentage of the season that has passed based on the number of rows
52
- # in the group
50
+ df["Percentage Season"] = float("nan")
51
+
52
+ # Group by Region and Harvest Year
53
53
  grouped = df.groupby(["Region", "Harvest Year"])
54
- df["Percentage Season"] = (
55
- grouped.cumcount() * 100.0 / grouped["CEI"].transform("size")
56
- )
54
+
55
+ # Loop through groups with tqdm
56
+ for (region, year), group in tqdm(grouped, desc="Computing Percentage Season"):
57
+ idx = group.index
58
+ n = len(group)
59
+ df.loc[idx, "Percentage Season"] = [i * 100.0 / n for i in range(n)]
57
60
 
58
61
  return df
59
62
 
@@ -18,7 +18,7 @@ def get_yld_prd(df, name_crop, cntr, region, calendar_year, region_column="ADM1_
18
18
 
19
19
  """
20
20
  # Get yield and production for country for specific year
21
- val = np.NaN
21
+ val = np.nan
22
22
 
23
23
  # df.columns.values: [u'ADM0_NAME', u'ADM1_NAME', u'ADM2_NAME', u'str_ID', u'num_ID', 1990 ... 2015]
24
24
  if calendar_year in df.columns:
@@ -88,7 +88,7 @@ def get_yld_prd(df, name_crop, cntr, region, calendar_year, region_column="ADM1_
88
88
 
89
89
  try:
90
90
  if val.isnull().all():
91
- val = np.NaN
91
+ val = np.nan
92
92
  else:
93
93
  val = val.values[0]
94
94
  except:
@@ -97,10 +97,10 @@ def get_yld_prd(df, name_crop, cntr, region, calendar_year, region_column="ADM1_
97
97
  else:
98
98
  # The values[-1] is a hack to accommodate multiple types of green maize
99
99
  vals = df[calendar_year]
100
- val = vals.values[-1] if not vals.empty else np.NaN
100
+ val = vals.values[-1] if not vals.empty else np.nan
101
101
 
102
102
  # Replace yield/production value of 0 with NaN
103
- val = np.NaN if val == 0.0 else val
103
+ val = np.nan if val == 0.0 else val
104
104
 
105
105
  return val
106
106
 
@@ -120,7 +120,7 @@ def add_GEOGLAM_statistics(dir_stats, df, stats, method, admin_zone):
120
120
  """
121
121
  # Create empty columns for all the ag statistics
122
122
  for stat in stats:
123
- df.loc[:, stat] = np.NaN
123
+ df.loc[:, stat] = np.nan
124
124
 
125
125
  # Fill in the ag statistics columns with data when available
126
126
  # Compute national scale statistics
@@ -1,3 +1,4 @@
1
+
1
2
  from graphviz import Source
2
3
 
3
4
  # Read your dot file
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: geocif
3
- Version: 0.2.67
3
+ Version: 0.2.69
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -16,6 +16,17 @@ Classifier: Programming Language :: Python :: 3.9
16
16
  Requires-Python: >=3.9
17
17
  Description-Content-Type: text/markdown
18
18
  License-File: LICENSE
19
+ Dynamic: author
20
+ Dynamic: author-email
21
+ Dynamic: classifier
22
+ Dynamic: description
23
+ Dynamic: description-content-type
24
+ Dynamic: home-page
25
+ Dynamic: keywords
26
+ Dynamic: license
27
+ Dynamic: license-file
28
+ Dynamic: requires-python
29
+ Dynamic: summary
19
30
 
20
31
  # geocif
21
32
 
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.2.67",
53
+ version="0.2.69",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes