geocif 0.1.51__tar.gz → 0.1.52__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {geocif-0.1.51/geocif.egg-info → geocif-0.1.52}/PKG-INFO +1 -1
- {geocif-0.1.51 → geocif-0.1.52}/geocif/analysis.py +221 -110
- {geocif-0.1.51 → geocif-0.1.52}/geocif/indices_runner_v2.py +2 -1
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/feature_selection.py +1 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/aa.py +50 -0
- {geocif-0.1.51 → geocif-0.1.52/geocif.egg-info}/PKG-INFO +1 -1
- {geocif-0.1.51 → geocif-0.1.52}/setup.py +1 -1
- {geocif-0.1.51 → geocif-0.1.52}/LICENSE +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/MANIFEST.in +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/README.md +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/__init__.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/agmet/__init__.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/agmet/geoagmet.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/agmet/plot.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/agmet/utils.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/__init__.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/constants.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/features.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/geo.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/geocif.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/metadata.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/backup/models.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/cei/__init__.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/cei/definitions.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/cei/indices.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/experiments.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/geocif.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/geocif_runner.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/indices_runner.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/indices_runner_v3.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/logger.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/__init__.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/correlations.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/embedding.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/feature_engineering.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/outliers.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/outlook.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/output.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/spatial_autocorrelation.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/stages.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/stats.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/trainers.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/trend.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/ml/xai.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/mm.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/__init__.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/automl.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/download_esi.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/enso.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/gamtest.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/misc.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/sustain.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/test_catboost.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/tmp.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/tmp2.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/tmp3.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/tmp4.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/playground/tmp5.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/risk/__init__.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/risk/impact_assessment.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/utils.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/viz/__init__.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif/viz/plot.py +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif.egg-info/SOURCES.txt +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif.egg-info/dependency_links.txt +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif.egg-info/not-zip-safe +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/geocif.egg-info/top_level.txt +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/requirements.txt +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/setup.cfg +0 -0
- {geocif-0.1.51 → geocif-0.1.52}/tests/test_geocif.py +0 -0
@@ -165,7 +165,6 @@ class Geoanalysis:
|
|
165
165
|
df_metrics = self._process_metrics(df_metrics)
|
166
166
|
|
167
167
|
self._plot_metrics(df_metrics)
|
168
|
-
|
169
168
|
df_regional_metrics_by_year = self._compute_regional_metrics(
|
170
169
|
df, by="Harvest Year"
|
171
170
|
)
|
@@ -177,9 +176,9 @@ class Geoanalysis:
|
|
177
176
|
self._store_results(
|
178
177
|
df_metrics, df_regional_metrics, df_regional_metrics_by_year
|
179
178
|
)
|
180
|
-
|
181
179
|
df_national_yield = self._compute_national_yield(df)
|
182
180
|
self._plot_national_yield(df_national_yield)
|
181
|
+
self._plot_regional_yield_scatter(df)
|
183
182
|
|
184
183
|
return df_metrics, df_regional_metrics, df_national_yield
|
185
184
|
|
@@ -250,12 +249,15 @@ class Geoanalysis:
|
|
250
249
|
else:
|
251
250
|
return df.groupby(cols).apply(self.regional_metrics).reset_index()
|
252
251
|
|
253
|
-
def _select_top_years(self, df_regional_metrics):
|
254
|
-
|
255
|
-
df_regional_metrics
|
256
|
-
|
257
|
-
|
258
|
-
|
252
|
+
def _select_top_years(self, df_regional_metrics, top_N=-1):
|
253
|
+
if top_N == -1:
|
254
|
+
return df_regional_metrics
|
255
|
+
else:
|
256
|
+
return (
|
257
|
+
df_regional_metrics.groupby(["Country", "Region"])
|
258
|
+
.apply(lambda x: self.select_top_N_years(x, 10))
|
259
|
+
.reset_index(drop=True)
|
260
|
+
)
|
259
261
|
|
260
262
|
def _average_mape(self, df_regional_metrics):
|
261
263
|
cols = [
|
@@ -338,18 +340,29 @@ class Geoanalysis:
|
|
338
340
|
|
339
341
|
con.commit()
|
340
342
|
|
341
|
-
def _compute_national_yield(self,
|
342
|
-
#
|
343
|
+
def _compute_national_yield(self, df_region):
|
344
|
+
# Define column names
|
343
345
|
observed = "Observed Yield (tn per ha)"
|
344
346
|
predicted = "Predicted Yield (tn per ha)"
|
345
347
|
area_ha = "Area (ha)"
|
346
348
|
|
347
|
-
|
348
|
-
|
349
|
+
df_tmp = df_region.copy()
|
350
|
+
|
351
|
+
# Fill
|
352
|
+
df_tmp[area_ha] = df_tmp.groupby("Country")[area_ha].transform(lambda x: x.fillna(x.median()))
|
353
|
+
|
354
|
+
# Log that we are filling missing values with the median
|
355
|
+
self.logger.info(
|
356
|
+
f"Filling missing values in {area_ha} with the median for each country"
|
357
|
+
)
|
358
|
+
|
359
|
+
# Compute observed and predicted national yield by multiplying Yield (tn per ha) by Area (ha)
|
360
|
+
df_tmp[observed] = df_tmp[observed] * df_tmp[area_ha]
|
361
|
+
df_tmp[predicted] = df_tmp[predicted] * df_tmp[area_ha]
|
349
362
|
|
350
363
|
# Group by Country and Harvest Year, then sum the National Yield and Area
|
351
364
|
df_national_yield = (
|
352
|
-
|
365
|
+
df_tmp.groupby(["Country", "Harvest Year"])
|
353
366
|
.agg({observed: "sum", predicted: "sum", area_ha: "sum"})
|
354
367
|
.reset_index()
|
355
368
|
)
|
@@ -364,53 +377,116 @@ class Geoanalysis:
|
|
364
377
|
|
365
378
|
return df_national_yield
|
366
379
|
|
367
|
-
def
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
)
|
380
|
+
def _plot_regional_yield_scatter(self, df):
|
381
|
+
"""
|
382
|
+
Plot observed vs predicted yield for all regions and all years.
|
383
|
+
"""
|
384
|
+
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
|
373
385
|
|
374
|
-
|
375
|
-
|
376
|
-
y_predicted = df_national_yield["Predicted Yield (tn per ha)"]
|
386
|
+
# Ensure 'Harvest Year' is numeric
|
387
|
+
df["Harvest Year"] = pd.to_numeric(df["Harvest Year"], errors="coerce")
|
377
388
|
|
389
|
+
# Extract data
|
390
|
+
y_observed = df["Observed Yield (tn per ha)"]
|
391
|
+
y_predicted = df["Predicted Yield (tn per ha)"]
|
392
|
+
years = df["Harvest Year"]
|
393
|
+
|
394
|
+
# Generate colors for years
|
395
|
+
cmap = plt.cm.viridis # Colormap for years
|
396
|
+
norm = plt.Normalize(vmin=years.min(), vmax=years.max()) # Normalize years to colormap
|
397
|
+
colors = [cmap(norm(year)) for year in years]
|
398
|
+
|
399
|
+
# Create the plot
|
378
400
|
with plt.style.context("science"):
|
379
|
-
plt.
|
401
|
+
fig, ax = plt.subplots(figsize=(10, 6))
|
402
|
+
|
403
|
+
# Add gridlines
|
404
|
+
ax.grid(True, linestyle="--", alpha=0.5)
|
405
|
+
|
406
|
+
# Scatter plot with colors representing years
|
407
|
+
scatter = ax.scatter(y_observed, y_predicted, color=colors, s=50)
|
380
408
|
|
381
|
-
|
409
|
+
# Add 1:1 diagonal line
|
410
|
+
max_yield = max(y_observed.max(), y_predicted.max()) * 1.25
|
411
|
+
ax.plot([0, max_yield], [0, max_yield], color="gray", linestyle="--")
|
412
|
+
|
413
|
+
# Calculate and display metrics
|
414
|
+
rmse = np.sqrt(mean_squared_error(y_observed, y_predicted))
|
415
|
+
mape = mean_absolute_percentage_error(y_observed, y_predicted)
|
416
|
+
r2 = r2_score(y_observed, y_predicted)
|
417
|
+
|
418
|
+
textstr = f"RMSE: {rmse:.2f} tn/ha\nMAPE: {mape:.2%}\n$r^2$: {r2:.2f}"
|
419
|
+
ax.annotate(
|
420
|
+
textstr,
|
421
|
+
xy=(0.05, 0.95),
|
422
|
+
xycoords="axes fraction",
|
423
|
+
fontsize=12,
|
424
|
+
verticalalignment="top",
|
425
|
+
)
|
382
426
|
|
383
|
-
|
384
|
-
|
427
|
+
# Set axis limits and labels
|
428
|
+
ax.set_xlabel("Observed Yield (tn/ha)")
|
429
|
+
ax.set_ylabel("Predicted Yield (tn/ha)")
|
430
|
+
ax.set_xlim(0, max_yield)
|
431
|
+
ax.set_ylim(0, max_yield)
|
385
432
|
|
386
|
-
# Add
|
387
|
-
plt.
|
433
|
+
# Add colorbar for years
|
434
|
+
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
|
435
|
+
sm.set_array([])
|
436
|
+
cbar = fig.colorbar(sm, ax=ax, aspect=50, pad=0.02)
|
437
|
+
cbar.set_label("Harvest Year")
|
388
438
|
|
389
|
-
for
|
390
|
-
|
439
|
+
# Set equispaced ticks for exactly 5 points
|
440
|
+
ticks = np.linspace(years.min(), years.max(), 5, dtype=int) # 5 equispaced ticks
|
441
|
+
cbar.set_ticks(ticks)
|
442
|
+
cbar.ax.set_yticklabels([str(tick) for tick in ticks])
|
443
|
+
|
444
|
+
plt.tight_layout()
|
445
|
+
|
446
|
+
# Save the plot
|
447
|
+
fname = f"scatter_all_regions_{self.country}_{self.crop}.png"
|
448
|
+
plt.savefig(self.dir_analysis / fname, dpi=250)
|
449
|
+
plt.close()
|
450
|
+
|
451
|
+
def _plot_national_yield(self, df_national_yield):
|
452
|
+
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
|
453
|
+
|
454
|
+
# Ensure 'Harvest Year' is numeric
|
455
|
+
df_national_yield["Harvest Year"] = pd.to_numeric(df_national_yield["Harvest Year"], errors="coerce")
|
456
|
+
|
457
|
+
# Extract data
|
458
|
+
x = df_national_yield["Harvest Year"]
|
459
|
+
y_observed = df_national_yield["Observed Yield (tn per ha)"]
|
460
|
+
y_predicted = df_national_yield["Predicted Yield (tn per ha)"]
|
461
|
+
|
462
|
+
# Generate colors for years
|
463
|
+
cmap = plt.cm.viridis # Colormap for years
|
464
|
+
norm = plt.Normalize(vmin=x.min(), vmax=x.max()) # Normalize years to colormap
|
465
|
+
colors = [cmap(norm(year)) for year in x]
|
466
|
+
|
467
|
+
# Create the plot
|
468
|
+
with plt.style.context("science"):
|
469
|
+
fig, ax = plt.subplots(figsize=(10, 6)) # Explicitly define axes
|
391
470
|
|
392
|
-
# X and Y-axis range from 0 to the maximum observed/predicted yield * 1.1
|
393
471
|
max_yield = max(y_observed.max(), y_predicted.max()) * 1.25
|
394
|
-
plt.xlim(0, max_yield)
|
395
|
-
plt.ylim(0, max_yield)
|
396
472
|
|
397
|
-
# Add
|
398
|
-
|
473
|
+
# Add gridlines
|
474
|
+
ax.grid(True, linestyle="--", alpha=0.5)
|
399
475
|
|
400
|
-
#
|
476
|
+
# Scatter plot with uniform size and dynamic colors
|
477
|
+
for year, obs, pred, color in zip(x, y_observed, y_predicted, colors):
|
478
|
+
ax.scatter(obs, pred, color=color, s=50, label=year)
|
479
|
+
|
480
|
+
# Add 1:1 diagonal line
|
481
|
+
ax.plot([0, max_yield], [0, max_yield], color="gray", linestyle="--")
|
482
|
+
|
483
|
+
# Calculate and display metrics
|
401
484
|
rmse = np.sqrt(mean_squared_error(y_observed, y_predicted))
|
402
485
|
mape = mean_absolute_percentage_error(y_observed, y_predicted)
|
403
486
|
r2 = r2_score(y_observed, y_predicted)
|
404
487
|
|
405
|
-
|
406
|
-
|
407
|
-
(
|
408
|
-
f"RMSE: {rmse:.2f} tn/ha",
|
409
|
-
f"MAPE: {mape:.2%}",
|
410
|
-
f"R²: {r2:.2f}",
|
411
|
-
)
|
412
|
-
)
|
413
|
-
plt.gca().annotate(
|
488
|
+
textstr = f"RMSE: {rmse:.2f} tn/ha\nMAPE: {mape:.2%}\nr²: {r2:.2f}"
|
489
|
+
ax.annotate(
|
414
490
|
textstr,
|
415
491
|
xy=(0.05, 0.95),
|
416
492
|
xycoords="axes fraction",
|
@@ -418,17 +494,26 @@ class Geoanalysis:
|
|
418
494
|
verticalalignment="top",
|
419
495
|
)
|
420
496
|
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
)
|
497
|
+
# Set axis limits and labels
|
498
|
+
ax.set_xlabel("Observed Yield (tn/ha)")
|
499
|
+
ax.set_ylabel("Predicted Yield (tn/ha)")
|
500
|
+
ax.set_xlim(0, max_yield)
|
501
|
+
ax.set_ylim(0, max_yield)
|
502
|
+
|
503
|
+
# Add legend for years
|
504
|
+
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
|
505
|
+
sm.set_array([])
|
506
|
+
cbar = fig.colorbar(sm, ax=ax, aspect=50, pad=0.02) # Specify the axis explicitly
|
507
|
+
cbar.set_label("Harvest Year")
|
508
|
+
|
509
|
+
# Set equispaced ticks for exactly 5 points
|
510
|
+
ticks = np.linspace(x.min(), x.max(), 5, dtype=int) # 5 equispaced ticks
|
511
|
+
cbar.set_ticks(ticks)
|
512
|
+
cbar.ax.set_yticklabels([str(tick) for tick in ticks])
|
513
|
+
|
430
514
|
plt.tight_layout()
|
431
515
|
|
516
|
+
# Save the plot
|
432
517
|
fname = f"scatter_{self.country}_{self.crop}.png"
|
433
518
|
plt.savefig(self.dir_analysis / fname, dpi=250)
|
434
519
|
plt.close()
|
@@ -535,7 +620,11 @@ class Geoanalysis:
|
|
535
620
|
"ADM1_NAME" if self.admin_zone == "admin_1" else "ADM2_NAME"
|
536
621
|
)
|
537
622
|
analysis_years = df_model["Harvest Year"].unique()
|
538
|
-
|
623
|
+
pbar = tqdm(analysis_years, leave=False)
|
624
|
+
for idx, year in enumerate(pbar):
|
625
|
+
pbar.set_description(f"Map {year}")
|
626
|
+
pbar.update()
|
627
|
+
|
539
628
|
df_harvest_year = df_model[df_model["Harvest Year"] == year]
|
540
629
|
|
541
630
|
for time_period in tqdm(
|
@@ -563,12 +652,12 @@ class Geoanalysis:
|
|
563
652
|
cmap=pal.scientific.sequential.Bamako_20_r,
|
564
653
|
series="sequential",
|
565
654
|
show_bg=False,
|
566
|
-
annotate_regions=
|
655
|
+
annotate_regions=True,
|
567
656
|
annotate_region_column=annotate_region_column,
|
568
657
|
loc_legend="lower left",
|
569
658
|
)
|
570
659
|
#
|
571
|
-
|
660
|
+
""" Unique regions """
|
572
661
|
fname = f"{self.country}_{self.crop}_region_ID.png"
|
573
662
|
col = "Region_ID"
|
574
663
|
df_model[col] = df_model[col].astype(int) + 1
|
@@ -578,27 +667,28 @@ class Geoanalysis:
|
|
578
667
|
int(key): key
|
579
668
|
for key in df_time_period["Region_ID"].unique()
|
580
669
|
}
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
670
|
+
|
671
|
+
# plot.plot_df_shpfile(
|
672
|
+
# self.dg, # dataframe containing adm1 name and polygon
|
673
|
+
# df_model, # dataframe containing information that will be mapped
|
674
|
+
# dict_lup=dict_region,
|
675
|
+
# merge_col="Country Region", # Column on which to merge
|
676
|
+
# name_country=countries, # Plot global map
|
677
|
+
# name_col=col, # Which column to plot
|
678
|
+
# dir_out=self.dir_plot / str(year), # Output directory
|
679
|
+
# fname=fname, # Output file name
|
680
|
+
# label=f"Region Cluster\n{self.crop.title()}",
|
681
|
+
# vmin=df_model[col].min(),
|
682
|
+
# vmax=df_model[col].max(),
|
683
|
+
# cmap=pal.tableau.Tableau_20.mpl_colors,
|
684
|
+
# series="qualitative",
|
685
|
+
# show_bg=False,
|
686
|
+
# alpha_feature=1,
|
687
|
+
# use_key=True,
|
688
|
+
# annotate_regions=True,
|
689
|
+
# annotate_region_column=annotate_region_column,
|
690
|
+
# loc_legend="lower left",
|
691
|
+
# )
|
602
692
|
# breakpoint()
|
603
693
|
|
604
694
|
# """ Anomaly """
|
@@ -619,7 +709,7 @@ class Geoanalysis:
|
|
619
709
|
# cmap=pal.cartocolors.diverging.Geyser_5_r,
|
620
710
|
# series="sequential",
|
621
711
|
# show_bg=False,
|
622
|
-
# annotate_regions=
|
712
|
+
# annotate_regions=True,
|
623
713
|
# annotate_region_column=annotate_region_column,
|
624
714
|
# loc_legend="lower left",
|
625
715
|
# )
|
@@ -640,7 +730,7 @@ class Geoanalysis:
|
|
640
730
|
cmap=pal.scientific.sequential.Bamako_20_r,
|
641
731
|
series="sequential",
|
642
732
|
show_bg=False,
|
643
|
-
annotate_regions=
|
733
|
+
annotate_regions=True,
|
644
734
|
annotate_region_column=annotate_region_column,
|
645
735
|
loc_legend="lower left",
|
646
736
|
)
|
@@ -661,32 +751,32 @@ class Geoanalysis:
|
|
661
751
|
# cmap=pal.scientific.sequential.Bamako_20_r,
|
662
752
|
# series="sequential",
|
663
753
|
# show_bg=False,
|
664
|
-
# annotate_regions=
|
754
|
+
# annotate_regions=True,
|
665
755
|
# annotate_region_column=annotate_region_column,
|
666
756
|
# loc_legend="lower left",
|
667
757
|
# )
|
668
758
|
|
669
759
|
# Area
|
670
760
|
# breakpoint()
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
761
|
+
if df_time_period["Area (ha)"].notna().all():
|
762
|
+
fname = f"{self.country}_{self.crop}_{year}_area.png"
|
763
|
+
plot.plot_df_shpfile(
|
764
|
+
self.dg, # dataframe containing adm1 name and polygon
|
765
|
+
df_time_period, # dataframe containing information that will be mapped
|
766
|
+
merge_col="Country Region", # Column on which to merge
|
767
|
+
name_country=countries, # Plot global map
|
768
|
+
name_col="Area (ha)", # Which column to plot
|
769
|
+
dir_out=self.dir_plot / str(year), # Output directory
|
770
|
+
fname=fname, # Output file name
|
771
|
+
label=f"Area (ha)\n{self.crop.title()}, {time_period}",
|
772
|
+
vmin=df_time_period["Area (ha)"].min(),
|
773
|
+
vmax=df_time_period["Area (ha)"].max(),
|
774
|
+
cmap=pal.scientific.sequential.Bamako_20_r,
|
775
|
+
series="sequential",
|
776
|
+
show_bg=False,
|
777
|
+
annotate_regions=True,
|
778
|
+
loc_legend="lower left",
|
779
|
+
)
|
690
780
|
|
691
781
|
def plot_metric(self, df, metric="$r^2$"):
|
692
782
|
with plt.style.context("science"):
|
@@ -788,10 +878,20 @@ class Geoanalysis:
|
|
788
878
|
"name_shapefile": name_shapefile,
|
789
879
|
}
|
790
880
|
|
881
|
+
shp_file = self.parser.get(country, "boundary_file")
|
791
882
|
self.dg = gpd.read_file(
|
792
|
-
self.dir_shapefiles /
|
883
|
+
self.dir_shapefiles / shp_file,
|
793
884
|
engine="pyogrio",
|
794
885
|
)
|
886
|
+
self.admin_col_name = self.parser.get(country, "admin_col_name")
|
887
|
+
|
888
|
+
# If ADMIN0 or ADM0_NAME is not in the shapefile, then add ADM0_NAME
|
889
|
+
if "ADMIN0" or "ADM0_NAME" not in self.dg.columns:
|
890
|
+
self.dg.loc[:, "ADMIN0"] = country.title().replace("_", " ")
|
891
|
+
# if ADMIN1 or ADM1_NAME is not in the shapefile, then rename admin_col_name to ADM1_NAME
|
892
|
+
if "ADMIN1" or "ADM1_NAME" not in self.dg.columns:
|
893
|
+
if admin_zone == "admin_1":
|
894
|
+
self.dg.rename(columns={self.admin_col_name: "ADMIN1"}, inplace=True)
|
795
895
|
|
796
896
|
# Hack rename Tanzania to United Republic of Tanzania
|
797
897
|
self.dg["ADMIN0"] = self.dg["ADMIN0"].replace(
|
@@ -813,9 +913,10 @@ class Geoanalysis:
|
|
813
913
|
self.dg["Country Region"] = self.dg["Country Region"].str.cat(
|
814
914
|
self.dg["ADM1_NAME"], sep=" "
|
815
915
|
)
|
816
|
-
|
817
|
-
self.dg["
|
818
|
-
|
916
|
+
if "ADM2_NAME" in self.dg.columns:
|
917
|
+
self.dg.loc[self.dg["ADM2_NAME"].notna(), "Country Region"] = (
|
918
|
+
self.dg["ADM0_NAME"] + " " + self.dg["ADM2_NAME"]
|
919
|
+
)
|
819
920
|
# Make it lower case
|
820
921
|
self.dg["Country Region"] = (
|
821
922
|
self.dg["Country Region"].str.lower().replace("_", " ")
|
@@ -937,7 +1038,7 @@ class RegionalMapper(Geoanalysis):
|
|
937
1038
|
df_tmp = df_model[
|
938
1039
|
(df_model["% of total Area (ha)"] < 0.5)
|
939
1040
|
& (df_model["Mean Absolute Percentage Error"] > 100)
|
940
|
-
|
1041
|
+
]
|
941
1042
|
|
942
1043
|
df_model = df_model.drop(df_tmp.index)
|
943
1044
|
|
@@ -947,7 +1048,6 @@ class RegionalMapper(Geoanalysis):
|
|
947
1048
|
sns.histplot(
|
948
1049
|
group_data["Mean Absolute Percentage Error"],
|
949
1050
|
label=label,
|
950
|
-
# clip=(0, None),
|
951
1051
|
)
|
952
1052
|
|
953
1053
|
# Plot a dashed gray line at x=20
|
@@ -958,6 +1058,17 @@ class RegionalMapper(Geoanalysis):
|
|
958
1058
|
plt.ylabel("Frequency")
|
959
1059
|
plt.legend(title="Country", title_fontsize="13")
|
960
1060
|
|
1061
|
+
# Adding the title at the top-right corner
|
1062
|
+
plt.text(
|
1063
|
+
0.95, 0.95, # Coordinates in axes fraction
|
1064
|
+
f"Model: {model}",
|
1065
|
+
transform=plt.gca().transAxes,
|
1066
|
+
fontsize=14,
|
1067
|
+
verticalalignment="top",
|
1068
|
+
horizontalalignment="right",
|
1069
|
+
bbox=dict(facecolor="white", alpha=0.6, edgecolor="none")
|
1070
|
+
)
|
1071
|
+
|
961
1072
|
plt.tight_layout()
|
962
1073
|
plt.savefig(self.dir_analysis / f"mape_histogram_{model}.png", dpi=250)
|
963
1074
|
plt.close()
|
@@ -983,13 +1094,14 @@ class RegionalMapper(Geoanalysis):
|
|
983
1094
|
|
984
1095
|
df_model = df_model.drop(df_tmp.index)
|
985
1096
|
|
986
|
-
fname = f"mape_{self.crop}_{df_model['Model'].iloc[0]}.png"
|
987
1097
|
col = "Mean Absolute Percentage Error"
|
988
1098
|
countries = df_model["Country"].unique().tolist()
|
989
1099
|
countries = [country.title().replace("_", " ") for country in countries]
|
1100
|
+
crop = df_model["Crop"].unique()[0].title().replace('_', ' ')
|
990
1101
|
df = df_model[df_model["Country"].isin(countries)]
|
991
1102
|
self.dg = self.dg[self.dg["ADM0_NAME"].isin(countries)]
|
992
1103
|
|
1104
|
+
fname = f"mape_{crop}_{df_model['Model'].iloc[0]}.png"
|
993
1105
|
plot.plot_df_shpfile(
|
994
1106
|
self.dg,
|
995
1107
|
df,
|
@@ -1004,7 +1116,7 @@ class RegionalMapper(Geoanalysis):
|
|
1004
1116
|
cmap=pal.scientific.sequential.Bamako_20_r,
|
1005
1117
|
series="sequential",
|
1006
1118
|
show_bg=False,
|
1007
|
-
annotate_regions=
|
1119
|
+
annotate_regions=True,
|
1008
1120
|
loc_legend="lower left",
|
1009
1121
|
)
|
1010
1122
|
|
@@ -1028,8 +1140,7 @@ class RegionalMapper(Geoanalysis):
|
|
1028
1140
|
# Draw a dashed gray line at y=20
|
1029
1141
|
plt.axhline(y=20, color="gray", linestyle="--")
|
1030
1142
|
|
1031
|
-
plt.
|
1032
|
-
plt.xlabel("Year")
|
1143
|
+
plt.xlabel("")
|
1033
1144
|
plt.ylabel("Mean Absolute Percentage Error (%)")
|
1034
1145
|
plt.xticks(rotation=0)
|
1035
1146
|
|
@@ -47,7 +47,8 @@ class cei_runner(base.BaseGeo):
|
|
47
47
|
|
48
48
|
self.dir_input = Path(self.parser.get("PATHS", "dir_input"))
|
49
49
|
self.base_dir = Path(
|
50
|
-
r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\nepal"
|
50
|
+
#r"D:\Users\ritvik\projects\GEOGLAM\Output\countries\nepal"
|
51
|
+
r"/gpfs/data1/cmongp1/GEOGLAM/Output/countries/nepal"
|
51
52
|
) # Path(self.parser.get("PATHS", "dir_crop_inputs"))
|
52
53
|
self.do_parallel = self.parser.getboolean("DEFAULT", "do_parallel")
|
53
54
|
|
@@ -310,6 +310,7 @@ def select_features(X, y, method="RFE", min_features_to_select=3, threshold_nan=
|
|
310
310
|
# Get the selected feature names
|
311
311
|
selected_features = X.columns[selected_features].tolist()
|
312
312
|
|
313
|
+
# print(selected_features)
|
313
314
|
# Filter the dataset for selected features
|
314
315
|
X_filtered = X.loc[:, selected_features]
|
315
316
|
|
@@ -1,3 +1,53 @@
|
|
1
|
+
from great_tables import GT, html
|
2
|
+
import pandas as pd
|
3
|
+
|
4
|
+
# Data from the user-provided table
|
5
|
+
data = {
|
6
|
+
"province": ["Bagmati", "Koshi", "Madhesh", "Gandaki", "Lumbini", "Karnali", "Sudurpashchim"],
|
7
|
+
"2023 prediction": [3.738, 3.708, 3.583, 3.726, 3.291, 3.124, 2.607],
|
8
|
+
"Avg (2018-2022) - MOA": [3.858, 3.712, 3.668, 3.764, 3.771, 3.371, 3.399],
|
9
|
+
"2024 prediction": [3.807, 3.666, 3.691, 3.757, 3.427, 2.827, 2.567],
|
10
|
+
}
|
11
|
+
|
12
|
+
# Create a DataFrame
|
13
|
+
df = pd.DataFrame(data)
|
14
|
+
|
15
|
+
# Create a styled table
|
16
|
+
styled_table = (
|
17
|
+
GT(df)
|
18
|
+
.tab_header(
|
19
|
+
title="Predictions and Historical Averages by Province",
|
20
|
+
subtitle="Yield predictions for 2023, averages from 2018-2022, and predictions for 2024"
|
21
|
+
)
|
22
|
+
.cols_label(
|
23
|
+
province="Province",
|
24
|
+
**{
|
25
|
+
"2023 prediction": html("2023<br>Prediction"),
|
26
|
+
"Avg (2018-2022) - MOA": html("Avg<br>(2018-2022)<br>MOA"),
|
27
|
+
"2024 prediction": html("2024<br>Prediction")
|
28
|
+
}
|
29
|
+
)
|
30
|
+
.cols_width(
|
31
|
+
province="2%", # Narrow province column
|
32
|
+
**{
|
33
|
+
"2023 prediction": "4%",
|
34
|
+
"Avg (2018-2022) - MOA": "5%",
|
35
|
+
"2024 prediction": "4%"
|
36
|
+
}
|
37
|
+
)
|
38
|
+
)
|
39
|
+
|
40
|
+
# Save as a PDF
|
41
|
+
styled_table.save(
|
42
|
+
file="predictions_table.pdf",
|
43
|
+
scale=1.0, # Keep the scale reasonable
|
44
|
+
web_driver="chrome", # Requires Chrome installed
|
45
|
+
window_size=(1200, 800), # Adjust window size to make the table compact
|
46
|
+
)
|
47
|
+
|
48
|
+
print("Table saved as predictions_table.pdf")
|
49
|
+
|
50
|
+
breakpoint()
|
1
51
|
import pandas as pd
|
2
52
|
import numpy as np
|
3
53
|
import os
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|