geocif 0.2.45__tar.gz → 0.2.47__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. {geocif-0.2.45/geocif.egg-info → geocif-0.2.47}/PKG-INFO +1 -1
  2. geocif-0.2.47/geocif/indices_runner_algeria.py +214 -0
  3. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/aa.py +29 -14
  4. {geocif-0.2.45 → geocif-0.2.47/geocif.egg-info}/PKG-INFO +1 -1
  5. {geocif-0.2.45 → geocif-0.2.47}/geocif.egg-info/SOURCES.txt +1 -0
  6. {geocif-0.2.45 → geocif-0.2.47}/setup.py +1 -1
  7. {geocif-0.2.45 → geocif-0.2.47}/LICENSE +0 -0
  8. {geocif-0.2.45 → geocif-0.2.47}/MANIFEST.in +0 -0
  9. {geocif-0.2.45 → geocif-0.2.47}/README.md +0 -0
  10. {geocif-0.2.45 → geocif-0.2.47}/geocif/__init__.py +0 -0
  11. {geocif-0.2.45 → geocif-0.2.47}/geocif/agmet/__init__.py +0 -0
  12. {geocif-0.2.45 → geocif-0.2.47}/geocif/agmet/geoagmet.py +0 -0
  13. {geocif-0.2.45 → geocif-0.2.47}/geocif/agmet/plot.py +0 -0
  14. {geocif-0.2.45 → geocif-0.2.47}/geocif/agmet/utils.py +0 -0
  15. {geocif-0.2.45 → geocif-0.2.47}/geocif/analysis.py +0 -0
  16. {geocif-0.2.45 → geocif-0.2.47}/geocif/backup/__init__.py +0 -0
  17. {geocif-0.2.45 → geocif-0.2.47}/geocif/backup/constants.py +0 -0
  18. {geocif-0.2.45 → geocif-0.2.47}/geocif/backup/features.py +0 -0
  19. {geocif-0.2.45 → geocif-0.2.47}/geocif/backup/geo.py +0 -0
  20. {geocif-0.2.45 → geocif-0.2.47}/geocif/backup/geocif.py +0 -0
  21. {geocif-0.2.45 → geocif-0.2.47}/geocif/backup/metadata.py +0 -0
  22. {geocif-0.2.45 → geocif-0.2.47}/geocif/backup/models.py +0 -0
  23. {geocif-0.2.45 → geocif-0.2.47}/geocif/cei/__init__.py +0 -0
  24. {geocif-0.2.45 → geocif-0.2.47}/geocif/cei/definitions.py +0 -0
  25. {geocif-0.2.45 → geocif-0.2.47}/geocif/cei/indices.py +0 -0
  26. {geocif-0.2.45 → geocif-0.2.47}/geocif/experiments.py +0 -0
  27. {geocif-0.2.45 → geocif-0.2.47}/geocif/geocif.py +0 -0
  28. {geocif-0.2.45 → geocif-0.2.47}/geocif/geocif_runner.py +0 -0
  29. {geocif-0.2.45 → geocif-0.2.47}/geocif/indices_runner.py +0 -0
  30. {geocif-0.2.45 → geocif-0.2.47}/geocif/indices_runner_angola.py +0 -0
  31. {geocif-0.2.45 → geocif-0.2.47}/geocif/indices_runner_madagascar.py +0 -0
  32. {geocif-0.2.45 → geocif-0.2.47}/geocif/indices_runner_malawi.py +0 -0
  33. {geocif-0.2.45 → geocif-0.2.47}/geocif/indices_runner_mozambique.py +0 -0
  34. {geocif-0.2.45 → geocif-0.2.47}/geocif/indices_runner_south_africa.py +0 -0
  35. {geocif-0.2.45 → geocif-0.2.47}/geocif/indices_runner_zambia.py +0 -0
  36. {geocif-0.2.45 → geocif-0.2.47}/geocif/indices_runner_zimbabwe.py +0 -0
  37. {geocif-0.2.45 → geocif-0.2.47}/geocif/logger.py +0 -0
  38. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/__init__.py +0 -0
  39. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/correlations.py +0 -0
  40. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/embedding.py +0 -0
  41. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/feature_engineering.py +0 -0
  42. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/feature_selection.py +0 -0
  43. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/outliers.py +0 -0
  44. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/outlook.py +0 -0
  45. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/output.py +0 -0
  46. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/spatial_autocorrelation.py +0 -0
  47. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/stages.py +0 -0
  48. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/stats.py +0 -0
  49. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/trainers.py +0 -0
  50. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/trend.py +0 -0
  51. {geocif-0.2.45 → geocif-0.2.47}/geocif/ml/xai.py +0 -0
  52. {geocif-0.2.45 → geocif-0.2.47}/geocif/mm.py +0 -0
  53. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/__init__.py +0 -0
  54. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/area.py +0 -0
  55. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/automl.py +0 -0
  56. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/download_esi.py +0 -0
  57. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/enso.py +0 -0
  58. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/eval.py +0 -0
  59. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/gamtest.py +0 -0
  60. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/gee_access.py +0 -0
  61. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/misc.py +0 -0
  62. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/play_xagg.py +0 -0
  63. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/reg.py +0 -0
  64. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/sustain.py +0 -0
  65. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/test_catboost.py +0 -0
  66. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/tmp.py +0 -0
  67. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/tmp2.py +0 -0
  68. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/tmp3.py +0 -0
  69. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/tmp4.py +0 -0
  70. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/tmp5.py +0 -0
  71. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/wolayita.py +0 -0
  72. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/wolayita_maize_mask.py +0 -0
  73. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/wolayita_v2.py +0 -0
  74. {geocif-0.2.45 → geocif-0.2.47}/geocif/playground/wolayita_v3.py +0 -0
  75. {geocif-0.2.45 → geocif-0.2.47}/geocif/risk/__init__.py +0 -0
  76. {geocif-0.2.45 → geocif-0.2.47}/geocif/risk/impact_assessment.py +0 -0
  77. {geocif-0.2.45 → geocif-0.2.47}/geocif/utils.py +0 -0
  78. {geocif-0.2.45 → geocif-0.2.47}/geocif/viz/__init__.py +0 -0
  79. {geocif-0.2.45 → geocif-0.2.47}/geocif/viz/gt.py +0 -0
  80. {geocif-0.2.45 → geocif-0.2.47}/geocif/viz/plot.py +0 -0
  81. {geocif-0.2.45 → geocif-0.2.47}/geocif/viz/tmp.py +0 -0
  82. {geocif-0.2.45 → geocif-0.2.47}/geocif/viz/viz_ml.py +0 -0
  83. {geocif-0.2.45 → geocif-0.2.47}/geocif.egg-info/dependency_links.txt +0 -0
  84. {geocif-0.2.45 → geocif-0.2.47}/geocif.egg-info/not-zip-safe +0 -0
  85. {geocif-0.2.45 → geocif-0.2.47}/geocif.egg-info/top_level.txt +0 -0
  86. {geocif-0.2.45 → geocif-0.2.47}/requirements.txt +0 -0
  87. {geocif-0.2.45 → geocif-0.2.47}/setup.cfg +0 -0
  88. {geocif-0.2.45 → geocif-0.2.47}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.2.45
3
+ Version: 0.2.47
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -0,0 +1,214 @@
1
+ import itertools
2
+ import warnings
3
+ from multiprocessing import Pool, cpu_count
4
+ from pathlib import Path
5
+
6
+ import arrow as ar
7
+ import pandas as pd
8
+ from tqdm import tqdm
9
+
10
+ warnings.filterwarnings("ignore")
11
+
12
+ from .cei import indices
13
+ from geoprepare import base
14
+
15
+ country = "algeria"
16
+
17
+
18
+ def remove_duplicates(lst):
19
+ """
20
+
21
+ :param lst:
22
+ :return:
23
+ """
24
+ return list(set([i for i in lst]))
25
+
26
+
27
+ def get_admin_zone(country, dg_shp):
28
+ admin_zone = "admin_1"
29
+ country = country.title().replace(" ", "_")
30
+
31
+ # Read in shapefile
32
+ dg_country = dg_shp[dg_shp["ADMIN0"] == country]
33
+
34
+ # Is the ADMIN2 column all None? If so, return admin_1 else return admin_2
35
+ if dg_country.empty:
36
+ admin_zone = "admin_1"
37
+ elif not dg_country["ADMIN2"].isna().all():
38
+ admin_zone = "admin_2"
39
+
40
+ return admin_zone
41
+
42
+
43
+ class cei_runner(base.BaseGeo):
44
+ def __init__(self, path_config_file):
45
+ super().__init__(path_config_file)
46
+
47
+ # Parse configuration files
48
+ self.parse_config()
49
+
50
+ self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
51
+ import platform
52
+
53
+ if platform.system() == "Linux":
54
+ self.base_dir = Path(
55
+ rf"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/{country}"
56
+ )
57
+ else:
58
+ self.base_dir = Path(
59
+ rf"D:\Users\ritvik\projects\GEOGLAM\Output\countries\{country}"
60
+ ) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
61
+ self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
62
+
63
+ def collect_files(self):
64
+ """
65
+ 1. Collect all the files which contain EO information
66
+ 2. Exclude files from the `processed` directory if it is already in
67
+ processed_include_fall directory
68
+ 3. Create a dataframe that contains the following columns:
69
+ - directory: name of directory where file is located
70
+ - path: full path to file
71
+ - filename: name of file
72
+ :return: Return the dataframe created above
73
+ """
74
+ import geopandas as gp
75
+
76
+ dg_shp = gp.read_file(
77
+ self.dir_input
78
+ / "Global_Datasets"
79
+ / "Regions"
80
+ / "Shps"
81
+ / "adm_shapefile.shp",
82
+ engine="pyogrio",
83
+ )
84
+
85
+ # Collect all the files which contain EO information
86
+ df_files = pd.DataFrame(columns=["directory", "path", "filename", "admin_zone"])
87
+ for filepath in self.base_dir.rglob("*.csv"):
88
+ country = filepath.parents[0].name
89
+
90
+ admin_zone = get_admin_zone(country, dg_shp)
91
+
92
+ # If country is not in cc.COUNTRIES then skip
93
+ # HACK: Skip korea for now, as it is giving errors
94
+ if country == "republic_of_korea":
95
+ continue
96
+
97
+ # Get name of directory one level up
98
+ process_type = filepath.parents[1].name
99
+
100
+ # Get name of file
101
+ filename = filepath.name
102
+
103
+ # Add to dataframe
104
+ df_files.loc[len(df_files)] = [process_type, filepath, filename, admin_zone]
105
+
106
+ # Exclude those rows where directory is processed and file is already in
107
+ # processed_include_fall directory
108
+ no_fall = df_files["directory"] == "processed"
109
+ include_fall = df_files[df_files["directory"] == "processed_include_fall"][
110
+ "filename"
111
+ ]
112
+
113
+ df_files = df_files[~(no_fall & (df_files["filename"].isin(include_fall)))]
114
+
115
+ return df_files
116
+
117
+ def process_combinations(self, df, method):
118
+ """
119
+ Create a list of tuples of the following:
120
+ - directory: name of directory where file is located
121
+ - path: full path to file
122
+ - filename: name of file
123
+ - method: whether to compute indices for phenological stages or not
124
+ This tuple will be used as input to the `process` function
125
+ :param df:
126
+ :param method:
127
+ :return:
128
+ """
129
+ combinations = []
130
+
131
+ for index, row in tqdm(df.iterrows()):
132
+ combinations.extend(
133
+ list(
134
+ itertools.product([row[0]], [row[1]], [row[2]], [row[3]], [method])
135
+ )
136
+ )
137
+
138
+ combinations = remove_duplicates(combinations)
139
+
140
+ return combinations
141
+
142
+ def main(self, method):
143
+ """
144
+
145
+ :param method:
146
+ :return:
147
+ """
148
+ # Create a dataframe of the files to be analyzed
149
+ df_files = self.collect_files()
150
+
151
+ combinations = self.process_combinations(df_files, method)
152
+
153
+ # Add an element to the tuple to indicate the season
154
+ # Last element is redo flag which is True if the analysis is to be redone
155
+ # and False otherwise. Analysis is always redone for the current year
156
+ # and last year whether file exists or not
157
+ combinations = [
158
+ (
159
+ self.parser,
160
+ status,
161
+ path,
162
+ filename,
163
+ admin_zone,
164
+ category,
165
+ year,
166
+ "ndvi",
167
+ False, # redo
168
+ )
169
+ for year in range(2001, ar.utcnow().year + 1)
170
+ for status, path, filename, admin_zone, category in combinations
171
+ ]
172
+
173
+ # Only keep those entries in combinations where the third elemt is
174
+ # mozambique, south_africa, angola or dem_people's_rep_of_korea
175
+ # This is done to test the code for these countries
176
+ combinations = [i for i in combinations if f"{country}_maize_s1" in i[3]]
177
+
178
+ if True:
179
+ num_cpu = int(cpu_count() * 0.6)
180
+ with Pool(num_cpu) as p:
181
+ for i, _ in enumerate(p.imap_unordered(indices.process, combinations)):
182
+ pass
183
+ else:
184
+ # Use the code below if you want to test without parallelization or
185
+ # if you want to debug by using pdb
186
+ pbar = tqdm(combinations)
187
+ for i, val in enumerate(pbar):
188
+ pbar.set_description(
189
+ f"Main loop {combinations[i][2]} {combinations[i][5]}"
190
+ )
191
+ indices.process(val)
192
+
193
+
194
+ def run(path_config_files=[]):
195
+ """
196
+
197
+ Args:
198
+ path_config_files:
199
+
200
+ Returns:
201
+
202
+ """
203
+ """ Check dictionary keys to have no spaces"""
204
+ indices.validate_index_definitions()
205
+
206
+ for method in [
207
+ "monthly_r", # "dekad_r" # "dekad_r"
208
+ ]: # , "full_season", "phenological_stages", "fraction_season"]:
209
+ obj = cei_runner(path_config_files)
210
+ obj.main(method)
211
+
212
+
213
+ if __name__ == "__main__":
214
+ run()
@@ -2,28 +2,30 @@ import geopandas as gpd
2
2
  import pandas as pd
3
3
  import matplotlib.pyplot as plt
4
4
  import matplotlib as mpl
5
-
5
+ from pathlib import Path
6
6
 
7
7
  import pandas as pd
8
8
 
9
- # Step 1: Read the CSV file
10
- input_file_path = r"C:\Users\ritvik\Downloads\observed_adjusted_detrended_woreda_yield (1).csv"
11
- df = pd.read_csv(input_file_path)
9
+ DATA_DIR = Path(r"C:\Users\ritvik\Downloads\exported_all_db")
12
10
 
13
- # Step 2: Remove rows that are completely empty (all columns are NaN)
14
- df_cleaned = df.dropna(how='all')
11
+ # Grab all maize CSV files (skip 'models.csv')
12
+ csv_paths = sorted(DATA_DIR.glob("*_maize.csv"))
15
13
 
16
- # Step 3: Write the cleaned data back to a new CSV file
17
- output_file_path = r"C:\Users\ritvik\Downloads\observed_adjusted_detrended_woreda_yield_cleaned.csv"
18
- df_cleaned.to_csv(output_file_path, index=False)
14
+ dfs = []
15
+ for fp in csv_paths:
16
+ df = pd.read_csv(fp)
17
+ # Build the 'Country Region' column
18
+ df["Country Region"] = (
19
+ df["Country"].str.strip() + "_" + df["Region"].str.strip()
20
+ ).str.replace(r"\s+", "_", regex=True).str.lower()
21
+ dfs.append(df)
19
22
 
20
- # (Optional) Look at the first few rows of the cleaned DataFrame
21
- df_cleaned.head()
23
+ # Concatenate everything
24
+ df = pd.concat(dfs, ignore_index=True)
22
25
 
23
- breakpoint()
24
26
  # --- 1. Read data ---
25
27
  dg = gpd.read_file(r"D:\Users\ritvik\projects\GEOGLAM\safrica.shp")
26
- df = pd.read_csv(r"D:\Users\ritvik\projects\GEOGLAM\geocif_march_2025.csv")
28
+ # df = pd.read_csv(r"D:\Users\ritvik\projects\GEOGLAM\geocif_march_2025.csv")
27
29
 
28
30
  # --- 2. Create the new "Country Region" column ---
29
31
  dg['Country Region'] = (
@@ -42,6 +44,15 @@ dg['Country Region'] = (
42
44
  # --- 3. Merge shapefile with CSV ---
43
45
  merged = dg.merge(df, left_on='Country Region', right_on='Country Region', how='right')
44
46
 
47
+ # Compute '% Anomaly (2013-2017)' and '% Anomaly (2018-2022)' as (value - mean) / mean * 100
48
+ merged['% Anomaly (2013-2017)'] = (
49
+ merged["Predicted Yield (tn per ha)"] - merged['Median Yield (tn per ha) (2013-2017)']
50
+ ) / merged['Median Yield (tn per ha) (2013-2017)'] * 100
51
+ merged['% Anomaly (2018-2022)'] = (
52
+ merged["Predicted Yield (tn per ha)"] - merged['Median Yield (tn per ha) (2018-2022)']
53
+ ) / merged['Median Yield (tn per ha) (2018-2022)'] * 100
54
+
55
+
45
56
  # --- 4. Rename columns ---
46
57
  merged.rename(
47
58
  columns={
@@ -52,8 +63,12 @@ merged.rename(
52
63
  )
53
64
 
54
65
  # Optional: Write out merged shapefile
55
- merged.to_file(r"D:\Users\ritvik\projects\GEOGLAM\safrica_geocif_march_2025.shp")
66
+ merged.to_file(r"D:\Users\ritvik\projects\GEOGLAM\safrica_geocif_may_2025.shp")
67
+
68
+ # Output to CSV and exclude geometry
69
+ merged.drop(columns='geometry').to_csv(r"D:\Users\ritvik\projects\GEOGLAM\safrica_geocif_may_2025.csv", index=False)
56
70
 
71
+ breakpoint()
57
72
  # --- 5. Plot ---
58
73
  fig, ax = plt.subplots(1, 2, figsize=(20, 10))
59
74
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.2.45
3
+ Version: 0.2.47
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -10,6 +10,7 @@ geocif/experiments.py
10
10
  geocif/geocif.py
11
11
  geocif/geocif_runner.py
12
12
  geocif/indices_runner.py
13
+ geocif/indices_runner_algeria.py
13
14
  geocif/indices_runner_angola.py
14
15
  geocif/indices_runner_madagascar.py
15
16
  geocif/indices_runner_malawi.py
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.2.45",
53
+ version="0.2.47",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes