geocif 0.1.51__tar.gz → 0.1.52__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {geocif-0.1.51/geocif.egg-info → geocif-0.1.52}/PKG-INFO +1 -1
  2. {geocif-0.1.51 → geocif-0.1.52}/geocif/analysis.py +221 -110
  3. {geocif-0.1.51 → geocif-0.1.52}/geocif/indices_runner_v2.py +2 -1
  4. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/feature_selection.py +1 -0
  5. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/aa.py +50 -0
  6. {geocif-0.1.51 → geocif-0.1.52/geocif.egg-info}/PKG-INFO +1 -1
  7. {geocif-0.1.51 → geocif-0.1.52}/setup.py +1 -1
  8. {geocif-0.1.51 → geocif-0.1.52}/LICENSE +0 -0
  9. {geocif-0.1.51 → geocif-0.1.52}/MANIFEST.in +0 -0
  10. {geocif-0.1.51 → geocif-0.1.52}/README.md +0 -0
  11. {geocif-0.1.51 → geocif-0.1.52}/geocif/__init__.py +0 -0
  12. {geocif-0.1.51 → geocif-0.1.52}/geocif/agmet/__init__.py +0 -0
  13. {geocif-0.1.51 → geocif-0.1.52}/geocif/agmet/geoagmet.py +0 -0
  14. {geocif-0.1.51 → geocif-0.1.52}/geocif/agmet/plot.py +0 -0
  15. {geocif-0.1.51 → geocif-0.1.52}/geocif/agmet/utils.py +0 -0
  16. {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/__init__.py +0 -0
  17. {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/constants.py +0 -0
  18. {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/features.py +0 -0
  19. {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/geo.py +0 -0
  20. {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/geocif.py +0 -0
  21. {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/metadata.py +0 -0
  22. {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/models.py +0 -0
  23. {geocif-0.1.51 → geocif-0.1.52}/geocif/cei/__init__.py +0 -0
  24. {geocif-0.1.51 → geocif-0.1.52}/geocif/cei/definitions.py +0 -0
  25. {geocif-0.1.51 → geocif-0.1.52}/geocif/cei/indices.py +0 -0
  26. {geocif-0.1.51 → geocif-0.1.52}/geocif/experiments.py +0 -0
  27. {geocif-0.1.51 → geocif-0.1.52}/geocif/geocif.py +0 -0
  28. {geocif-0.1.51 → geocif-0.1.52}/geocif/geocif_runner.py +0 -0
  29. {geocif-0.1.51 → geocif-0.1.52}/geocif/indices_runner.py +0 -0
  30. {geocif-0.1.51 → geocif-0.1.52}/geocif/indices_runner_v3.py +0 -0
  31. {geocif-0.1.51 → geocif-0.1.52}/geocif/logger.py +0 -0
  32. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/__init__.py +0 -0
  33. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/correlations.py +0 -0
  34. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/embedding.py +0 -0
  35. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/feature_engineering.py +0 -0
  36. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/outliers.py +0 -0
  37. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/outlook.py +0 -0
  38. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/output.py +0 -0
  39. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/spatial_autocorrelation.py +0 -0
  40. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/stages.py +0 -0
  41. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/stats.py +0 -0
  42. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/trainers.py +0 -0
  43. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/trend.py +0 -0
  44. {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/xai.py +0 -0
  45. {geocif-0.1.51 → geocif-0.1.52}/geocif/mm.py +0 -0
  46. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/__init__.py +0 -0
  47. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/automl.py +0 -0
  48. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/download_esi.py +0 -0
  49. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/enso.py +0 -0
  50. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/gamtest.py +0 -0
  51. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/misc.py +0 -0
  52. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/sustain.py +0 -0
  53. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/test_catboost.py +0 -0
  54. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/tmp.py +0 -0
  55. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/tmp2.py +0 -0
  56. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/tmp3.py +0 -0
  57. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/tmp4.py +0 -0
  58. {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/tmp5.py +0 -0
  59. {geocif-0.1.51 → geocif-0.1.52}/geocif/risk/__init__.py +0 -0
  60. {geocif-0.1.51 → geocif-0.1.52}/geocif/risk/impact_assessment.py +0 -0
  61. {geocif-0.1.51 → geocif-0.1.52}/geocif/utils.py +0 -0
  62. {geocif-0.1.51 → geocif-0.1.52}/geocif/viz/__init__.py +0 -0
  63. {geocif-0.1.51 → geocif-0.1.52}/geocif/viz/plot.py +0 -0
  64. {geocif-0.1.51 → geocif-0.1.52}/geocif.egg-info/SOURCES.txt +0 -0
  65. {geocif-0.1.51 → geocif-0.1.52}/geocif.egg-info/dependency_links.txt +0 -0
  66. {geocif-0.1.51 → geocif-0.1.52}/geocif.egg-info/not-zip-safe +0 -0
  67. {geocif-0.1.51 → geocif-0.1.52}/geocif.egg-info/top_level.txt +0 -0
  68. {geocif-0.1.51 → geocif-0.1.52}/requirements.txt +0 -0
  69. {geocif-0.1.51 → geocif-0.1.52}/setup.cfg +0 -0
  70. {geocif-0.1.51 → geocif-0.1.52}/tests/test_geocif.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.51
3
+ Version: 0.1.52
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -165,7 +165,6 @@ class Geoanalysis:
165
165
  df_metrics = self._process_metrics(df_metrics)
166
166
 
167
167
  self._plot_metrics(df_metrics)
168
-
169
168
  df_regional_metrics_by_year = self._compute_regional_metrics(
170
169
  df, by="Harvest Year"
171
170
  )
@@ -177,9 +176,9 @@ class Geoanalysis:
177
176
  self._store_results(
178
177
  df_metrics, df_regional_metrics, df_regional_metrics_by_year
179
178
  )
180
-
181
179
  df_national_yield = self._compute_national_yield(df)
182
180
  self._plot_national_yield(df_national_yield)
181
+ self._plot_regional_yield_scatter(df)
183
182
 
184
183
  return df_metrics, df_regional_metrics, df_national_yield
185
184
 
@@ -250,12 +249,15 @@ class Geoanalysis:
250
249
  else:
251
250
  return df.groupby(cols).apply(self.regional_metrics).reset_index()
252
251
 
253
- def _select_top_years(self, df_regional_metrics):
254
- return (
255
- df_regional_metrics.groupby(["Country", "Region"])
256
- .apply(lambda x: self.select_top_N_years(x, 10))
257
- .reset_index(drop=True)
258
- )
252
+ def _select_top_years(self, df_regional_metrics, top_N=-1):
253
+ if top_N == -1:
254
+ return df_regional_metrics
255
+ else:
256
+ return (
257
+ df_regional_metrics.groupby(["Country", "Region"])
258
+ .apply(lambda x: self.select_top_N_years(x, 10))
259
+ .reset_index(drop=True)
260
+ )
259
261
 
260
262
  def _average_mape(self, df_regional_metrics):
261
263
  cols = [
@@ -338,18 +340,29 @@ class Geoanalysis:
338
340
 
339
341
  con.commit()
340
342
 
341
- def _compute_national_yield(self, df):
342
- # Compute observed and predicted national yield by multiplying Yield (tn per ha) by Area (ha)
343
+ def _compute_national_yield(self, df_region):
344
+ # Define column names
343
345
  observed = "Observed Yield (tn per ha)"
344
346
  predicted = "Predicted Yield (tn per ha)"
345
347
  area_ha = "Area (ha)"
346
348
 
347
- df.loc[:, observed] = df[observed] * df[area_ha]
348
- df.loc[:, predicted] = df[predicted] * df[area_ha]
349
+ df_tmp = df_region.copy()
350
+
351
+ # Fill
352
+ df_tmp[area_ha] = df_tmp.groupby("Country")[area_ha].transform(lambda x: x.fillna(x.median()))
353
+
354
+ # Log that we are filling missing values with the median
355
+ self.logger.info(
356
+ f"Filling missing values in {area_ha} with the median for each country"
357
+ )
358
+
359
+ # Compute observed and predicted national yield by multiplying Yield (tn per ha) by Area (ha)
360
+ df_tmp[observed] = df_tmp[observed] * df_tmp[area_ha]
361
+ df_tmp[predicted] = df_tmp[predicted] * df_tmp[area_ha]
349
362
 
350
363
  # Group by Country and Harvest Year, then sum the National Yield and Area
351
364
  df_national_yield = (
352
- df.groupby(["Country", "Harvest Year"])
365
+ df_tmp.groupby(["Country", "Harvest Year"])
353
366
  .agg({observed: "sum", predicted: "sum", area_ha: "sum"})
354
367
  .reset_index()
355
368
  )
@@ -364,53 +377,116 @@ class Geoanalysis:
364
377
 
365
378
  return df_national_yield
366
379
 
367
- def _plot_national_yield(self, df_national_yield, use_different_colors=True):
368
- from sklearn.metrics import (
369
- mean_squared_error,
370
- r2_score,
371
- mean_absolute_percentage_error,
372
- )
380
+ def _plot_regional_yield_scatter(self, df):
381
+ """
382
+ Plot observed vs predicted yield for all regions and all years.
383
+ """
384
+ from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
373
385
 
374
- x = df_national_yield["Harvest Year"]
375
- y_observed = df_national_yield["Observed Yield (tn per ha)"]
376
- y_predicted = df_national_yield["Predicted Yield (tn per ha)"]
386
+ # Ensure 'Harvest Year' is numeric
387
+ df["Harvest Year"] = pd.to_numeric(df["Harvest Year"], errors="coerce")
377
388
 
389
+ # Extract data
390
+ y_observed = df["Observed Yield (tn per ha)"]
391
+ y_predicted = df["Predicted Yield (tn per ha)"]
392
+ years = df["Harvest Year"]
393
+
394
+ # Generate colors for years
395
+ cmap = plt.cm.viridis # Colormap for years
396
+ norm = plt.Normalize(vmin=years.min(), vmax=years.max()) # Normalize years to colormap
397
+ colors = [cmap(norm(year)) for year in years]
398
+
399
+ # Create the plot
378
400
  with plt.style.context("science"):
379
- plt.figure(figsize=(10, 6))
401
+ fig, ax = plt.subplots(figsize=(10, 6))
402
+
403
+ # Add gridlines
404
+ ax.grid(True, linestyle="--", alpha=0.5)
405
+
406
+ # Scatter plot with colors representing years
407
+ scatter = ax.scatter(y_observed, y_predicted, color=colors, s=50)
380
408
 
381
- import palettable as pal
409
+ # Add 1:1 diagonal line
410
+ max_yield = max(y_observed.max(), y_predicted.max()) * 1.25
411
+ ax.plot([0, max_yield], [0, max_yield], color="gray", linestyle="--")
412
+
413
+ # Calculate and display metrics
414
+ rmse = np.sqrt(mean_squared_error(y_observed, y_predicted))
415
+ mape = mean_absolute_percentage_error(y_observed, y_predicted)
416
+ r2 = r2_score(y_observed, y_predicted)
417
+
418
+ textstr = f"RMSE: {rmse:.2f} tn/ha\nMAPE: {mape:.2%}\n$r^2$: {r2:.2f}"
419
+ ax.annotate(
420
+ textstr,
421
+ xy=(0.05, 0.95),
422
+ xycoords="axes fraction",
423
+ fontsize=12,
424
+ verticalalignment="top",
425
+ )
382
426
 
383
- colors = pal.tableau.Tableau_20.mpl_colors
384
- colors = colors[: len(x)]
427
+ # Set axis limits and labels
428
+ ax.set_xlabel("Observed Yield (tn/ha)")
429
+ ax.set_ylabel("Predicted Yield (tn/ha)")
430
+ ax.set_xlim(0, max_yield)
431
+ ax.set_ylim(0, max_yield)
385
432
 
386
- # Add dashed gray grid lines with alpha=0.5
387
- plt.grid(True, linestyle="--", alpha=0.5)
433
+ # Add colorbar for years
434
+ sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
435
+ sm.set_array([])
436
+ cbar = fig.colorbar(sm, ax=ax, aspect=50, pad=0.02)
437
+ cbar.set_label("Harvest Year")
388
438
 
389
- for i in range(len(x)):
390
- plt.scatter(y_observed[i], y_predicted[i], color=colors[i], label=x[i])
439
+ # Set equispaced ticks for exactly 5 points
440
+ ticks = np.linspace(years.min(), years.max(), 5, dtype=int) # 5 equispaced ticks
441
+ cbar.set_ticks(ticks)
442
+ cbar.ax.set_yticklabels([str(tick) for tick in ticks])
443
+
444
+ plt.tight_layout()
445
+
446
+ # Save the plot
447
+ fname = f"scatter_all_regions_{self.country}_{self.crop}.png"
448
+ plt.savefig(self.dir_analysis / fname, dpi=250)
449
+ plt.close()
450
+
451
+ def _plot_national_yield(self, df_national_yield):
452
+ from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
453
+
454
+ # Ensure 'Harvest Year' is numeric
455
+ df_national_yield["Harvest Year"] = pd.to_numeric(df_national_yield["Harvest Year"], errors="coerce")
456
+
457
+ # Extract data
458
+ x = df_national_yield["Harvest Year"]
459
+ y_observed = df_national_yield["Observed Yield (tn per ha)"]
460
+ y_predicted = df_national_yield["Predicted Yield (tn per ha)"]
461
+
462
+ # Generate colors for years
463
+ cmap = plt.cm.viridis # Colormap for years
464
+ norm = plt.Normalize(vmin=x.min(), vmax=x.max()) # Normalize years to colormap
465
+ colors = [cmap(norm(year)) for year in x]
466
+
467
+ # Create the plot
468
+ with plt.style.context("science"):
469
+ fig, ax = plt.subplots(figsize=(10, 6)) # Explicitly define axes
391
470
 
392
- # X and Y-axis range from 0 to the maximum observed/predicted yield * 1.1
393
471
  max_yield = max(y_observed.max(), y_predicted.max()) * 1.25
394
- plt.xlim(0, max_yield)
395
- plt.ylim(0, max_yield)
396
472
 
397
- # Add a line diagonally representing 1:1
398
- plt.plot([0, max_yield], [0, max_yield], color="gray", linestyle="--")
473
+ # Add gridlines
474
+ ax.grid(True, linestyle="--", alpha=0.5)
399
475
 
400
- # Calculate metrics
476
+ # Scatter plot with uniform size and dynamic colors
477
+ for year, obs, pred, color in zip(x, y_observed, y_predicted, colors):
478
+ ax.scatter(obs, pred, color=color, s=50, label=year)
479
+
480
+ # Add 1:1 diagonal line
481
+ ax.plot([0, max_yield], [0, max_yield], color="gray", linestyle="--")
482
+
483
+ # Calculate and display metrics
401
484
  rmse = np.sqrt(mean_squared_error(y_observed, y_predicted))
402
485
  mape = mean_absolute_percentage_error(y_observed, y_predicted)
403
486
  r2 = r2_score(y_observed, y_predicted)
404
487
 
405
- # Annotate metrics
406
- textstr = "\n".join(
407
- (
408
- f"RMSE: {rmse:.2f} tn/ha",
409
- f"MAPE: {mape:.2%}",
410
- f"R²: {r2:.2f}",
411
- )
412
- )
413
- plt.gca().annotate(
488
+ textstr = f"RMSE: {rmse:.2f} tn/ha\nMAPE: {mape:.2%}\nr²: {r2:.2f}"
489
+ ax.annotate(
414
490
  textstr,
415
491
  xy=(0.05, 0.95),
416
492
  xycoords="axes fraction",
@@ -418,17 +494,26 @@ class Geoanalysis:
418
494
  verticalalignment="top",
419
495
  )
420
496
 
421
- plt.xlabel("Observed Yield (tn/ha)")
422
- plt.ylabel("Predicted Yield (tn/ha)")
423
- # Place legend outside the plot to the right without a border
424
- plt.legend(
425
- title="Year",
426
- bbox_to_anchor=(1.05, 1),
427
- loc="upper left",
428
- edgecolor="none",
429
- )
497
+ # Set axis limits and labels
498
+ ax.set_xlabel("Observed Yield (tn/ha)")
499
+ ax.set_ylabel("Predicted Yield (tn/ha)")
500
+ ax.set_xlim(0, max_yield)
501
+ ax.set_ylim(0, max_yield)
502
+
503
+ # Add legend for years
504
+ sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
505
+ sm.set_array([])
506
+ cbar = fig.colorbar(sm, ax=ax, aspect=50, pad=0.02) # Specify the axis explicitly
507
+ cbar.set_label("Harvest Year")
508
+
509
+ # Set equispaced ticks for exactly 5 points
510
+ ticks = np.linspace(x.min(), x.max(), 5, dtype=int) # 5 equispaced ticks
511
+ cbar.set_ticks(ticks)
512
+ cbar.ax.set_yticklabels([str(tick) for tick in ticks])
513
+
430
514
  plt.tight_layout()
431
515
 
516
+ # Save the plot
432
517
  fname = f"scatter_{self.country}_{self.crop}.png"
433
518
  plt.savefig(self.dir_analysis / fname, dpi=250)
434
519
  plt.close()
@@ -535,7 +620,11 @@ class Geoanalysis:
535
620
  "ADM1_NAME" if self.admin_zone == "admin_1" else "ADM2_NAME"
536
621
  )
537
622
  analysis_years = df_model["Harvest Year"].unique()
538
- for idx, year in enumerate(tqdm(analysis_years, desc="Map")):
623
+ pbar = tqdm(analysis_years, leave=False)
624
+ for idx, year in enumerate(pbar):
625
+ pbar.set_description(f"Map {year}")
626
+ pbar.update()
627
+
539
628
  df_harvest_year = df_model[df_model["Harvest Year"] == year]
540
629
 
541
630
  for time_period in tqdm(
@@ -563,12 +652,12 @@ class Geoanalysis:
563
652
  cmap=pal.scientific.sequential.Bamako_20_r,
564
653
  series="sequential",
565
654
  show_bg=False,
566
- annotate_regions=False,
655
+ annotate_regions=True,
567
656
  annotate_region_column=annotate_region_column,
568
657
  loc_legend="lower left",
569
658
  )
570
659
  #
571
- # # """ Unique regions """
660
+ """ Unique regions """
572
661
  fname = f"{self.country}_{self.crop}_region_ID.png"
573
662
  col = "Region_ID"
574
663
  df_model[col] = df_model[col].astype(int) + 1
@@ -578,27 +667,28 @@ class Geoanalysis:
578
667
  int(key): key
579
668
  for key in df_time_period["Region_ID"].unique()
580
669
  }
581
- plot.plot_df_shpfile(
582
- self.dg, # dataframe containing adm1 name and polygon
583
- df_model, # dataframe containing information that will be mapped
584
- dict_lup=dict_region,
585
- merge_col="Country Region", # Column on which to merge
586
- name_country=countries, # Plot global map
587
- name_col=col, # Which column to plot
588
- dir_out=self.dir_plot / str(year), # Output directory
589
- fname=fname, # Output file name
590
- label=f"Region Cluster\n{self.crop.title()}",
591
- vmin=df_model[col].min(),
592
- vmax=df_model[col].max(),
593
- cmap=pal.tableau.Tableau_20.mpl_colors,
594
- series="qualitative",
595
- show_bg=False,
596
- alpha_feature=1,
597
- use_key=True,
598
- annotate_regions=False,
599
- annotate_region_column=annotate_region_column,
600
- loc_legend="lower left",
601
- )
670
+
671
+ # plot.plot_df_shpfile(
672
+ # self.dg, # dataframe containing adm1 name and polygon
673
+ # df_model, # dataframe containing information that will be mapped
674
+ # dict_lup=dict_region,
675
+ # merge_col="Country Region", # Column on which to merge
676
+ # name_country=countries, # Plot global map
677
+ # name_col=col, # Which column to plot
678
+ # dir_out=self.dir_plot / str(year), # Output directory
679
+ # fname=fname, # Output file name
680
+ # label=f"Region Cluster\n{self.crop.title()}",
681
+ # vmin=df_model[col].min(),
682
+ # vmax=df_model[col].max(),
683
+ # cmap=pal.tableau.Tableau_20.mpl_colors,
684
+ # series="qualitative",
685
+ # show_bg=False,
686
+ # alpha_feature=1,
687
+ # use_key=True,
688
+ # annotate_regions=True,
689
+ # annotate_region_column=annotate_region_column,
690
+ # loc_legend="lower left",
691
+ # )
602
692
  # breakpoint()
603
693
 
604
694
  # """ Anomaly """
@@ -619,7 +709,7 @@ class Geoanalysis:
619
709
  # cmap=pal.cartocolors.diverging.Geyser_5_r,
620
710
  # series="sequential",
621
711
  # show_bg=False,
622
- # annotate_regions=False,
712
+ # annotate_regions=True,
623
713
  # annotate_region_column=annotate_region_column,
624
714
  # loc_legend="lower left",
625
715
  # )
@@ -640,7 +730,7 @@ class Geoanalysis:
640
730
  cmap=pal.scientific.sequential.Bamako_20_r,
641
731
  series="sequential",
642
732
  show_bg=False,
643
- annotate_regions=False,
733
+ annotate_regions=True,
644
734
  annotate_region_column=annotate_region_column,
645
735
  loc_legend="lower left",
646
736
  )
@@ -661,32 +751,32 @@ class Geoanalysis:
661
751
  # cmap=pal.scientific.sequential.Bamako_20_r,
662
752
  # series="sequential",
663
753
  # show_bg=False,
664
- # annotate_regions=False,
754
+ # annotate_regions=True,
665
755
  # annotate_region_column=annotate_region_column,
666
756
  # loc_legend="lower left",
667
757
  # )
668
758
 
669
759
  # Area
670
760
  # breakpoint()
671
- # if df_time_period["Area (ha)"].notna().all():
672
- # fname = f"{self.country}_{self.crop}_{year}_area.png"
673
- # plot.plot_df_shpfile(
674
- # self.dg, # dataframe containing adm1 name and polygon
675
- # df_time_period, # dataframe containing information that will be mapped
676
- # merge_col="Country Region", # Column on which to merge
677
- # name_country=country, # Plot global map
678
- # name_col="Area (ha)", # Which column to plot
679
- # dir_out=self.plot_dir / str(year), # Output directory
680
- # fname=fname, # Output file name
681
- # label=f"{self.predicted}\n{self.crop.title()}, {time_period}",
682
- # vmin=df_time_period[self.predicted].min(),
683
- # vmax=df_time_period[self.predicted].max(),
684
- # cmap=pal.scientific.sequential.Bamako_20_r,
685
- # series="sequential",
686
- # show_bg=False,
687
- # annotate_regions=True,
688
- # loc_legend="lower left",
689
- # )
761
+ if df_time_period["Area (ha)"].notna().all():
762
+ fname = f"{self.country}_{self.crop}_{year}_area.png"
763
+ plot.plot_df_shpfile(
764
+ self.dg, # dataframe containing adm1 name and polygon
765
+ df_time_period, # dataframe containing information that will be mapped
766
+ merge_col="Country Region", # Column on which to merge
767
+ name_country=countries, # Plot global map
768
+ name_col="Area (ha)", # Which column to plot
769
+ dir_out=self.dir_plot / str(year), # Output directory
770
+ fname=fname, # Output file name
771
+ label=f"Area (ha)\n{self.crop.title()}, {time_period}",
772
+ vmin=df_time_period["Area (ha)"].min(),
773
+ vmax=df_time_period["Area (ha)"].max(),
774
+ cmap=pal.scientific.sequential.Bamako_20_r,
775
+ series="sequential",
776
+ show_bg=False,
777
+ annotate_regions=True,
778
+ loc_legend="lower left",
779
+ )
690
780
 
691
781
  def plot_metric(self, df, metric="$r^2$"):
692
782
  with plt.style.context("science"):
@@ -788,10 +878,20 @@ class Geoanalysis:
788
878
  "name_shapefile": name_shapefile,
789
879
  }
790
880
 
881
+ shp_file = self.parser.get(country, "boundary_file")
791
882
  self.dg = gpd.read_file(
792
- self.dir_shapefiles / "adm_shapefile.shp",
883
+ self.dir_shapefiles / shp_file,
793
884
  engine="pyogrio",
794
885
  )
886
+ self.admin_col_name = self.parser.get(country, "admin_col_name")
887
+
888
+ # If ADMIN0 or ADM0_NAME is not in the shapefile, then add ADM0_NAME
889
+ if "ADMIN0" or "ADM0_NAME" not in self.dg.columns:
890
+ self.dg.loc[:, "ADMIN0"] = country.title().replace("_", " ")
891
+ # if ADMIN1 or ADM1_NAME is not in the shapefile, then rename admin_col_name to ADM1_NAME
892
+ if "ADMIN1" or "ADM1_NAME" not in self.dg.columns:
893
+ if admin_zone == "admin_1":
894
+ self.dg.rename(columns={self.admin_col_name: "ADMIN1"}, inplace=True)
795
895
 
796
896
  # Hack rename Tanzania to United Republic of Tanzania
797
897
  self.dg["ADMIN0"] = self.dg["ADMIN0"].replace(
@@ -813,9 +913,10 @@ class Geoanalysis:
813
913
  self.dg["Country Region"] = self.dg["Country Region"].str.cat(
814
914
  self.dg["ADM1_NAME"], sep=" "
815
915
  )
816
- self.dg.loc[self.dg["ADM2_NAME"].notna(), "Country Region"] = (
817
- self.dg["ADM0_NAME"] + " " + self.dg["ADM2_NAME"]
818
- )
916
+ if "ADM2_NAME" in self.dg.columns:
917
+ self.dg.loc[self.dg["ADM2_NAME"].notna(), "Country Region"] = (
918
+ self.dg["ADM0_NAME"] + " " + self.dg["ADM2_NAME"]
919
+ )
819
920
  # Make it lower case
820
921
  self.dg["Country Region"] = (
821
922
  self.dg["Country Region"].str.lower().replace("_", " ")
@@ -937,7 +1038,7 @@ class RegionalMapper(Geoanalysis):
937
1038
  df_tmp = df_model[
938
1039
  (df_model["% of total Area (ha)"] < 0.5)
939
1040
  & (df_model["Mean Absolute Percentage Error"] > 100)
940
- ]
1041
+ ]
941
1042
 
942
1043
  df_model = df_model.drop(df_tmp.index)
943
1044
 
@@ -947,7 +1048,6 @@ class RegionalMapper(Geoanalysis):
947
1048
  sns.histplot(
948
1049
  group_data["Mean Absolute Percentage Error"],
949
1050
  label=label,
950
- # clip=(0, None),
951
1051
  )
952
1052
 
953
1053
  # Plot a dashed gray line at x=20
@@ -958,6 +1058,17 @@ class RegionalMapper(Geoanalysis):
958
1058
  plt.ylabel("Frequency")
959
1059
  plt.legend(title="Country", title_fontsize="13")
960
1060
 
1061
+ # Adding the title at the top-right corner
1062
+ plt.text(
1063
+ 0.95, 0.95, # Coordinates in axes fraction
1064
+ f"Model: {model}",
1065
+ transform=plt.gca().transAxes,
1066
+ fontsize=14,
1067
+ verticalalignment="top",
1068
+ horizontalalignment="right",
1069
+ bbox=dict(facecolor="white", alpha=0.6, edgecolor="none")
1070
+ )
1071
+
961
1072
  plt.tight_layout()
962
1073
  plt.savefig(self.dir_analysis / f"mape_histogram_{model}.png", dpi=250)
963
1074
  plt.close()
@@ -983,13 +1094,14 @@ class RegionalMapper(Geoanalysis):
983
1094
 
984
1095
  df_model = df_model.drop(df_tmp.index)
985
1096
 
986
- fname = f"mape_{self.crop}_{df_model['Model'].iloc[0]}.png"
987
1097
  col = "Mean Absolute Percentage Error"
988
1098
  countries = df_model["Country"].unique().tolist()
989
1099
  countries = [country.title().replace("_", " ") for country in countries]
1100
+ crop = df_model["Crop"].unique()[0].title().replace('_', ' ')
990
1101
  df = df_model[df_model["Country"].isin(countries)]
991
1102
  self.dg = self.dg[self.dg["ADM0_NAME"].isin(countries)]
992
1103
 
1104
+ fname = f"mape_{crop}_{df_model['Model'].iloc[0]}.png"
993
1105
  plot.plot_df_shpfile(
994
1106
  self.dg,
995
1107
  df,
@@ -1004,7 +1116,7 @@ class RegionalMapper(Geoanalysis):
1004
1116
  cmap=pal.scientific.sequential.Bamako_20_r,
1005
1117
  series="sequential",
1006
1118
  show_bg=False,
1007
- annotate_regions=False,
1119
+ annotate_regions=True,
1008
1120
  loc_legend="lower left",
1009
1121
  )
1010
1122
 
@@ -1028,8 +1140,7 @@ class RegionalMapper(Geoanalysis):
1028
1140
  # Draw a dashed gray line at y=20
1029
1141
  plt.axhline(y=20, color="gray", linestyle="--")
1030
1142
 
1031
- plt.title("Mean Absolute Percentage Error by Year")
1032
- plt.xlabel("Year")
1143
+ plt.xlabel("")
1033
1144
  plt.ylabel("Mean Absolute Percentage Error (%)")
1034
1145
  plt.xticks(rotation=0)
1035
1146
 
@@ -47,7 +47,8 @@ class cei_runner(base.BaseGeo):
47
47
 
48
48
  self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
49
49
  self.base_dir = Path(
50
- r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\nepal"
50
+ #r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\nepal"
51
+ r"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/nepal"
51
52
  ) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
52
53
  self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
53
54
 
@@ -310,6 +310,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3, threshold_nan=
310
310
  # Get the selected feature names
311
311
  selected_features = X.columns[selected_features].tolist()
312
312
 
313
+ # print(selected_features)
313
314
  # Filter the dataset for selected features
314
315
  X_filtered = X.loc[:, selected_features]
315
316
 
@@ -1,3 +1,53 @@
1
+ from great_tables import GT, html
2
+ import pandas as pd
3
+
4
+ # Data from the user-provided table
5
+ data = {
6
+ "province": ["Bagmati", "Koshi", "Madhesh", "Gandaki", "Lumbini", "Karnali", "Sudurpashchim"],
7
+ "2023 prediction": [3.738, 3.708, 3.583, 3.726, 3.291, 3.124, 2.607],
8
+ "Avg (2018-2022) - MOA": [3.858, 3.712, 3.668, 3.764, 3.771, 3.371, 3.399],
9
+ "2024 prediction": [3.807, 3.666, 3.691, 3.757, 3.427, 2.827, 2.567],
10
+ }
11
+
12
+ # Create a DataFrame
13
+ df = pd.DataFrame(data)
14
+
15
+ # Create a styled table
16
+ styled_table = (
17
+ GT(df)
18
+ .tab_header(
19
+ title="Predictions and Historical Averages by Province",
20
+ subtitle="Yield predictions for 2023, averages from 2018-2022, and predictions for 2024"
21
+ )
22
+ .cols_label(
23
+ province="Province",
24
+ **{
25
+ "2023 prediction": html("2023<br>Prediction"),
26
+ "Avg (2018-2022) - MOA": html("Avg<br>(2018-2022)<br>MOA"),
27
+ "2024 prediction": html("2024<br>Prediction")
28
+ }
29
+ )
30
+ .cols_width(
31
+ province="2%", # Narrow province column
32
+ **{
33
+ "2023 prediction": "4%",
34
+ "Avg (2018-2022) - MOA": "5%",
35
+ "2024 prediction": "4%"
36
+ }
37
+ )
38
+ )
39
+
40
+ # Save as a PDF
41
+ styled_table.save(
42
+ file="predictions_table.pdf",
43
+ scale=1.0, # Keep the scale reasonable
44
+ web_driver="chrome", # Requires Chrome installed
45
+ window_size=(1200, 800), # Adjust window size to make the table compact
46
+ )
47
+
48
+ print("Table saved as predictions_table.pdf")
49
+
50
+ breakpoint()
1
51
  import pandas as pd
2
52
  import numpy as np
3
53
  import os
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: geocif
3
- Version: 0.1.51
3
+ Version: 0.1.52
4
4
  Summary: Models to visualize and forecast crop conditions and yields
5
5
  Home-page: https://ritviksahajpal.github.io/yield_forecasting/
6
6
  Author: Ritvik Sahajpal
@@ -50,6 +50,6 @@ setup(
50
50
  test_suite="tests",
51
51
  tests_require=test_requirements,
52
52
  url="https://ritviksahajpal.github.io/yield_forecasting/",
53
- version="0.1.51",
53
+ version="0.1.52",
54
54
  zip_safe=False,
55
55
  )
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes