imergpy 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- imergpy/__init__.py +12 -0
- imergpy/analyzer.py +87 -0
- imergpy/cli.py +13 -0
- imergpy/config.py +27 -0
- imergpy/core.py +174 -0
- imergpy/downloader.py +128 -0
- imergpy/plotter.py +49 -0
- imergpy/processor.py +157 -0
- imergpy/server.py +126 -0
- imergpy/static/logo.png +0 -0
- imergpy/templates/index.html +1277 -0
- imergpy-1.1.0.dist-info/METADATA +169 -0
- imergpy-1.1.0.dist-info/RECORD +17 -0
- imergpy-1.1.0.dist-info/WHEEL +5 -0
- imergpy-1.1.0.dist-info/entry_points.txt +2 -0
- imergpy-1.1.0.dist-info/licenses/LICENSE +21 -0
- imergpy-1.1.0.dist-info/top_level.txt +1 -0
imergpy/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
# imergpy package
|
|
2
|
+
from .core import get_precipitation
|
|
3
|
+
from .plotter import plot_from_excel
|
|
4
|
+
from .analyzer import add_accumulation, resample_data, calculate_statistics
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"get_precipitation",
|
|
8
|
+
"plot_from_excel",
|
|
9
|
+
"add_accumulation",
|
|
10
|
+
"resample_data",
|
|
11
|
+
"calculate_statistics"
|
|
12
|
+
]
|
imergpy/analyzer.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
PRECIP_COLUMNS = [
|
|
5
|
+
"Precipitation_mm_per_half_hour",
|
|
6
|
+
"Precipitation_mm_per_day",
|
|
7
|
+
"Precipitation_mm_per_month",
|
|
8
|
+
"Precipitation_mm",
|
|
9
|
+
"Precipitation_mm_hr",
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _find_column(df, candidates, label):
|
|
14
|
+
for column in candidates:
|
|
15
|
+
if column in df.columns:
|
|
16
|
+
return column
|
|
17
|
+
raise ValueError(f"Could not find {label}. Expected one of: {', '.join(candidates)}")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _time_column(df):
|
|
21
|
+
return _find_column(df, ["Start_Time", "Time"], "time column")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _precip_column(df):
|
|
25
|
+
return _find_column(df, PRECIP_COLUMNS, "precipitation column")
|
|
26
|
+
|
|
27
|
+
def add_accumulation(df):
|
|
28
|
+
"""
|
|
29
|
+
Takes a DataFrame with half-hourly IMERG data and adds:
|
|
30
|
+
1. 'Absolute_Precip_mm': Total mm fallen in that 30 min interval (Rate * 0.5)
|
|
31
|
+
2. 'Cumulative_Precip_mm': Running total of rainfall over the period.
|
|
32
|
+
"""
|
|
33
|
+
df = df.copy()
|
|
34
|
+
precip_col = _precip_column(df)
|
|
35
|
+
|
|
36
|
+
if precip_col == "Precipitation_mm_hr":
|
|
37
|
+
df['Absolute_Precip_mm'] = df[precip_col] * 0.5
|
|
38
|
+
else:
|
|
39
|
+
df['Absolute_Precip_mm'] = df[precip_col]
|
|
40
|
+
df['Cumulative_Precip_mm'] = df['Absolute_Precip_mm'].cumsum()
|
|
41
|
+
return df
|
|
42
|
+
|
|
43
|
+
def resample_data(df, freq='D'):
|
|
44
|
+
"""
|
|
45
|
+
Resamples the half-hourly data to Daily ('D') or Monthly ('M') totals.
|
|
46
|
+
Args:
|
|
47
|
+
df: Pandas DataFrame from IMERG excel
|
|
48
|
+
freq: 'D' for Daily, 'M' for Monthly
|
|
49
|
+
Returns:
|
|
50
|
+
Resampled DataFrame
|
|
51
|
+
"""
|
|
52
|
+
df = df.copy()
|
|
53
|
+
if 'Absolute_Precip_mm' not in df.columns:
|
|
54
|
+
df = add_accumulation(df)
|
|
55
|
+
|
|
56
|
+
time_col = _time_column(df)
|
|
57
|
+
df[time_col] = pd.to_datetime(df[time_col])
|
|
58
|
+
df.set_index(time_col, inplace=True)
|
|
59
|
+
|
|
60
|
+
# Resample and sum the absolute precipitation
|
|
61
|
+
resampled = df[['Absolute_Precip_mm']].resample(freq).sum()
|
|
62
|
+
resampled.rename(columns={'Absolute_Precip_mm': 'Total_Precip_mm'}, inplace=True)
|
|
63
|
+
|
|
64
|
+
return resampled.reset_index()
|
|
65
|
+
|
|
66
|
+
def calculate_statistics(df):
|
|
67
|
+
"""
|
|
68
|
+
Calculates extreme event statistics and thresholds for the given data.
|
|
69
|
+
"""
|
|
70
|
+
if 'Absolute_Precip_mm' not in df.columns:
|
|
71
|
+
df = add_accumulation(df)
|
|
72
|
+
|
|
73
|
+
# Get daily totals for threshold analysis
|
|
74
|
+
daily_df = resample_data(df, freq='D')
|
|
75
|
+
|
|
76
|
+
stats = {
|
|
77
|
+
"Total_Rainfall_mm": float(df['Absolute_Precip_mm'].sum()),
|
|
78
|
+
"Max_Interval_Precip_mm": float(df['Absolute_Precip_mm'].max()),
|
|
79
|
+
"Max_Daily_Rainfall_mm": float(daily_df['Total_Precip_mm'].max()),
|
|
80
|
+
"Total_Days_Analyzed": int(len(daily_df)),
|
|
81
|
+
"Dry_Days_(<1mm)": int(len(daily_df[daily_df['Total_Precip_mm'] < 1.0])),
|
|
82
|
+
"Wet_Days_(>=1mm)": int(len(daily_df[daily_df['Total_Precip_mm'] >= 1.0])),
|
|
83
|
+
"Heavy_Rain_Days_(>25mm)": int(len(daily_df[daily_df['Total_Precip_mm'] > 25.0])),
|
|
84
|
+
"Extreme_Rain_Days_(>50mm)": int(len(daily_df[daily_df['Total_Precip_mm'] > 50.0]))
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return stats
|
imergpy/cli.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from .server import start_server
|
|
3
|
+
|
|
4
|
+
def main():
|
|
5
|
+
"""Entry point for the imergpy CLI."""
|
|
6
|
+
try:
|
|
7
|
+
start_server()
|
|
8
|
+
except KeyboardInterrupt:
|
|
9
|
+
print("\nShutting down imergpy interface...")
|
|
10
|
+
sys.exit(0)
|
|
11
|
+
|
|
12
|
+
if __name__ == "__main__":
|
|
13
|
+
main()
|
imergpy/config.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# List of exactly 48 time strings used by GES DISC OTF
|
|
2
|
+
IMERG_TIMES = [
|
|
3
|
+
"S000000-E002959.0000", "S003000-E005959.0030", "S010000-E012959.0060", "S013000-E015959.0090",
|
|
4
|
+
"S020000-E022959.0120", "S023000-E025959.0150", "S030000-E032959.0180", "S033000-E035959.0210",
|
|
5
|
+
"S040000-E042959.0240", "S043000-E045959.0270", "S050000-E052959.0300", "S053000-E055959.0330",
|
|
6
|
+
"S060000-E062959.0360", "S063000-E065959.0390", "S070000-E072959.0420", "S073000-E075959.0450",
|
|
7
|
+
"S080000-E082959.0480", "S083000-E085959.0510", "S090000-E092959.0540", "S093000-E095959.0570",
|
|
8
|
+
"S100000-E102959.0600", "S103000-E105959.0630", "S110000-E112959.0660", "S113000-E115959.0690",
|
|
9
|
+
"S120000-E122959.0720", "S123000-E125959.0750", "S130000-E132959.0780", "S133000-E135959.0810",
|
|
10
|
+
"S140000-E142959.0840", "S143000-E145959.0870", "S150000-E152959.0900", "S153000-E155959.0930",
|
|
11
|
+
"S160000-E162959.0960", "S163000-E165959.0990", "S170000-E172959.1020", "S173000-E175959.1050",
|
|
12
|
+
"S180000-E182959.1080", "S183000-E185959.1110", "S190000-E192959.1140", "S193000-E195959.1170",
|
|
13
|
+
"S200000-E202959.1200", "S203000-E205959.1230", "S210000-E212959.1260", "S213000-E215959.1290",
|
|
14
|
+
"S220000-E222959.1320", "S223000-E225959.1350", "S230000-E232959.1380", "S233000-E235959.1410"
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
def get_time_string(dt_obj):
|
|
18
|
+
"""
|
|
19
|
+
Given a datetime object, returns the correct IMERG time string interval.
|
|
20
|
+
IMERG files are 30 min increments starting at top of hour.
|
|
21
|
+
"""
|
|
22
|
+
hour = dt_obj.hour
|
|
23
|
+
minute = dt_obj.minute
|
|
24
|
+
|
|
25
|
+
# 0 to 29 mins maps to first half hour, 30 to 59 maps to second
|
|
26
|
+
idx = hour * 2 + (1 if minute >= 30 else 0)
|
|
27
|
+
return IMERG_TIMES[idx]
|
imergpy/core.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import tempfile
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
from dateutil.relativedelta import relativedelta
|
|
6
|
+
from .downloader import DownloadError, EarthdataDownloader
|
|
7
|
+
from .processor import extract_area_average, extract_precipitation
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
VALID_RUN_TYPES = {"early", "late", "final"}
|
|
11
|
+
VALID_FREQUENCIES = {"hhr", "daily", "monthly"}
|
|
12
|
+
VALID_INTERPOLATION_METHODS = {"nearest", "linear", "cubic"}
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _parse_datetime(value):
|
|
16
|
+
value = str(value).replace('T', ' ')
|
|
17
|
+
formats = {
|
|
18
|
+
7: "%Y-%m",
|
|
19
|
+
10: "%Y-%m-%d",
|
|
20
|
+
16: "%Y-%m-%d %H:%M",
|
|
21
|
+
}
|
|
22
|
+
try:
|
|
23
|
+
return datetime.strptime(value, formats[len(value)])
|
|
24
|
+
except (KeyError, ValueError) as e:
|
|
25
|
+
raise ValueError(f"Invalid date format: {value}. Use YYYY-MM, YYYY-MM-DD, or YYYY-MM-DD HH:MM.") from e
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _validate_inputs(lat, lon, run_type, freq, interp_method):
|
|
29
|
+
if not -90 <= float(lat) <= 90:
|
|
30
|
+
raise ValueError("lat must be between -90 and 90.")
|
|
31
|
+
if not -180 <= float(lon) <= 180:
|
|
32
|
+
raise ValueError("lon must be between -180 and 180.")
|
|
33
|
+
if run_type not in VALID_RUN_TYPES:
|
|
34
|
+
raise ValueError("run_type must be 'early', 'late', or 'final'.")
|
|
35
|
+
if freq not in VALID_FREQUENCIES:
|
|
36
|
+
raise ValueError("freq must be 'hhr', 'daily', or 'monthly'.")
|
|
37
|
+
if interp_method not in VALID_INTERPOLATION_METHODS:
|
|
38
|
+
raise ValueError("interp_method must be 'nearest', 'linear', or 'cubic'.")
|
|
39
|
+
if freq == "monthly" and run_type != "final":
|
|
40
|
+
raise ValueError("monthly frequency only supports run_type='final'.")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _excel_dataframe(results):
|
|
44
|
+
df = pd.DataFrame(results)
|
|
45
|
+
preferred_order = [
|
|
46
|
+
"Start_Time",
|
|
47
|
+
"End_Time",
|
|
48
|
+
"Requested_Lat",
|
|
49
|
+
"Requested_Lon",
|
|
50
|
+
"Actual_Lat",
|
|
51
|
+
"Actual_Lon",
|
|
52
|
+
"Interpolation",
|
|
53
|
+
"IMERG_Version",
|
|
54
|
+
"Run_Type",
|
|
55
|
+
"Region_Type",
|
|
56
|
+
"Region_Name",
|
|
57
|
+
"Min_Lat",
|
|
58
|
+
"Min_Lon",
|
|
59
|
+
"Max_Lat",
|
|
60
|
+
"Max_Lon",
|
|
61
|
+
"Grid_Cells_Averaged",
|
|
62
|
+
]
|
|
63
|
+
precip_cols = [c for c in df.columns if c.startswith("Precipitation_")]
|
|
64
|
+
ordered_cols = [c for c in preferred_order + precip_cols if c in df.columns]
|
|
65
|
+
remaining_cols = [c for c in df.columns if c not in ordered_cols]
|
|
66
|
+
df = df[ordered_cols + remaining_cols]
|
|
67
|
+
return df.rename(columns={"Start_Time": "Start Time", "End_Time": "End Time"})
|
|
68
|
+
|
|
69
|
+
def get_precipitation(lat, lon, start_datetime, end_datetime, username, password,
|
|
70
|
+
run_type="early", freq="hhr", interp_method="nearest", out_dir=".",
|
|
71
|
+
progress_callback=None, selection_mode="point", bbox=None,
|
|
72
|
+
geometry=None, region_name=None):
|
|
73
|
+
"""
|
|
74
|
+
Main function to download IMERG data for a time period and save to Excel.
|
|
75
|
+
Now includes dual Start_Time and End_Time columns.
|
|
76
|
+
"""
|
|
77
|
+
if selection_mode == "point":
|
|
78
|
+
_validate_inputs(lat, lon, run_type, freq, interp_method)
|
|
79
|
+
else:
|
|
80
|
+
_validate_inputs(lat, lon, run_type, freq, "nearest")
|
|
81
|
+
if not bbox:
|
|
82
|
+
raise ValueError("bbox is required for country and square-area downloads.")
|
|
83
|
+
dt_start = _parse_datetime(start_datetime)
|
|
84
|
+
dt_end = _parse_datetime(end_datetime)
|
|
85
|
+
|
|
86
|
+
if dt_end < dt_start:
|
|
87
|
+
raise ValueError("end_datetime must be after start_datetime")
|
|
88
|
+
|
|
89
|
+
downloader = EarthdataDownloader(username, password)
|
|
90
|
+
|
|
91
|
+
time_stamp_start = dt_start.strftime("%Y%m%d_%H%M")
|
|
92
|
+
time_stamp_end = dt_end.strftime("%Y%m%d_%H%M")
|
|
93
|
+
region_label = region_name or f"{lat}_{lon}"
|
|
94
|
+
region_label = "".join(c if c.isalnum() or c in "._-" else "_" for c in str(region_label))
|
|
95
|
+
excel_filename = f"IMERG_{run_type}_{freq}_{selection_mode}_{region_label}_{time_stamp_start}_to_{time_stamp_end}.xlsx"
|
|
96
|
+
os.makedirs(out_dir, exist_ok=True)
|
|
97
|
+
excel_path = os.path.join(out_dir, excel_filename)
|
|
98
|
+
|
|
99
|
+
# Snap start time appropriately
|
|
100
|
+
if freq == "hhr":
|
|
101
|
+
minute = 0 if dt_start.minute < 30 else 30
|
|
102
|
+
current_dt = dt_start.replace(minute=minute, second=0, microsecond=0)
|
|
103
|
+
elif freq == "daily":
|
|
104
|
+
current_dt = dt_start.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
105
|
+
dt_end = dt_end.replace(hour=23, minute=59)
|
|
106
|
+
elif freq == "monthly":
|
|
107
|
+
current_dt = dt_start.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
|
|
108
|
+
total_steps = 0
|
|
109
|
+
temp_dt = current_dt
|
|
110
|
+
while temp_dt <= dt_end:
|
|
111
|
+
total_steps += 1
|
|
112
|
+
if freq == "hhr": temp_dt += timedelta(minutes=30)
|
|
113
|
+
elif freq == "daily": temp_dt += timedelta(days=1)
|
|
114
|
+
elif freq == "monthly": temp_dt += relativedelta(months=1)
|
|
115
|
+
|
|
116
|
+
results = []
|
|
117
|
+
failures = []
|
|
118
|
+
step_count = 0
|
|
119
|
+
if progress_callback:
|
|
120
|
+
progress_callback(0)
|
|
121
|
+
|
|
122
|
+
while current_dt <= dt_end:
|
|
123
|
+
step_count += 1
|
|
124
|
+
|
|
125
|
+
fd, temp_nc_path = tempfile.mkstemp(suffix=".nc4")
|
|
126
|
+
os.close(fd)
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
_, version_used = downloader.download_granule(lat, lon, current_dt, temp_nc_path, run_type, freq, bbox=bbox)
|
|
130
|
+
if selection_mode == "point":
|
|
131
|
+
data_dict = extract_precipitation(temp_nc_path, lat, lon, method=interp_method, freq=freq, current_dt=current_dt)
|
|
132
|
+
else:
|
|
133
|
+
data_dict = extract_area_average(
|
|
134
|
+
temp_nc_path,
|
|
135
|
+
bbox=bbox,
|
|
136
|
+
freq=freq,
|
|
137
|
+
current_dt=current_dt,
|
|
138
|
+
geometry=geometry,
|
|
139
|
+
region_name=region_name,
|
|
140
|
+
region_type=selection_mode,
|
|
141
|
+
)
|
|
142
|
+
data_dict["IMERG_Version"] = version_used
|
|
143
|
+
data_dict["Run_Type"] = run_type
|
|
144
|
+
results.append(data_dict)
|
|
145
|
+
except DownloadError as e:
|
|
146
|
+
failures.append({"datetime": current_dt.isoformat(), "error": str(e)})
|
|
147
|
+
print(f" -> Warning: {e}")
|
|
148
|
+
finally:
|
|
149
|
+
if os.path.exists(temp_nc_path):
|
|
150
|
+
os.remove(temp_nc_path)
|
|
151
|
+
if progress_callback:
|
|
152
|
+
progress_callback(int((step_count / total_steps) * 100))
|
|
153
|
+
|
|
154
|
+
if freq == "hhr": current_dt += timedelta(minutes=30)
|
|
155
|
+
elif freq == "daily": current_dt += timedelta(days=1)
|
|
156
|
+
elif freq == "monthly": current_dt += relativedelta(months=1)
|
|
157
|
+
|
|
158
|
+
if not results:
|
|
159
|
+
details = "; ".join(f"{f['datetime']}: {f['error']}" for f in failures[:3])
|
|
160
|
+
raise RuntimeError(f"No data could be successfully downloaded. {details}")
|
|
161
|
+
|
|
162
|
+
df = _excel_dataframe(results)
|
|
163
|
+
df.to_excel(excel_path, index=False)
|
|
164
|
+
|
|
165
|
+
# JSON-friendly results
|
|
166
|
+
serializable_results = []
|
|
167
|
+
for r in results:
|
|
168
|
+
entry = r.copy()
|
|
169
|
+
for k in ["Start_Time", "End_Time"]:
|
|
170
|
+
if hasattr(entry[k], 'isoformat'): entry[k] = entry[k].isoformat()
|
|
171
|
+
else: entry[k] = str(entry[k])
|
|
172
|
+
serializable_results.append(entry)
|
|
173
|
+
|
|
174
|
+
return excel_path, serializable_results
|
imergpy/downloader.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from requests.adapters import HTTPAdapter
|
|
3
|
+
from requests.auth import HTTPBasicAuth
|
|
4
|
+
from urllib3.util.retry import Retry
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DownloadError(Exception):
|
|
8
|
+
"""Raised when an IMERG granule cannot be downloaded."""
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class EarthdataDownloader:
|
|
12
|
+
def __init__(self, username, password, timeout=60, retries=3):
|
|
13
|
+
if not username or not password:
|
|
14
|
+
raise ValueError("NASA Earthdata username and password are required.")
|
|
15
|
+
|
|
16
|
+
self.timeout = timeout
|
|
17
|
+
self.session = requests.Session()
|
|
18
|
+
self.session.auth = HTTPBasicAuth(username, password)
|
|
19
|
+
retry = Retry(
|
|
20
|
+
total=retries,
|
|
21
|
+
connect=retries,
|
|
22
|
+
read=retries,
|
|
23
|
+
status=retries,
|
|
24
|
+
backoff_factor=0.5,
|
|
25
|
+
status_forcelist=(429, 500, 502, 503, 504),
|
|
26
|
+
allowed_methods=("GET",),
|
|
27
|
+
)
|
|
28
|
+
adapter = HTTPAdapter(max_retries=retry)
|
|
29
|
+
self.session.mount("https://", adapter)
|
|
30
|
+
self.session.mount("http://", adapter)
|
|
31
|
+
|
|
32
|
+
def _build_url(self, lat, lon, dt, version, run_type="early", freq="hhr", bbox=None):
|
|
33
|
+
from .config import get_time_string
|
|
34
|
+
|
|
35
|
+
if bbox:
|
|
36
|
+
min_lat, min_lon, max_lat, max_lon = bbox
|
|
37
|
+
min_lat, max_lat = max(-90.0, float(min_lat)), min(90.0, float(max_lat))
|
|
38
|
+
min_lon, max_lon = max(-180.0, float(min_lon)), min(180.0, float(max_lon))
|
|
39
|
+
else:
|
|
40
|
+
min_lat, max_lat = max(-90.0, lat - 0.1), min(90.0, lat + 0.1)
|
|
41
|
+
min_lon, max_lon = max(-180.0, lon - 0.1), min(180.0, lon + 0.1)
|
|
42
|
+
bbox = f"{min_lat},{min_lon},{max_lat},{max_lon}".replace(",", "%2C")
|
|
43
|
+
|
|
44
|
+
year = dt.strftime("%Y")
|
|
45
|
+
month = dt.strftime("%m")
|
|
46
|
+
doy = dt.strftime("%j")
|
|
47
|
+
date_str = dt.strftime("%Y%m%d")
|
|
48
|
+
|
|
49
|
+
if freq == "hhr":
|
|
50
|
+
time_str = get_time_string(dt)
|
|
51
|
+
ext = "HDF5"
|
|
52
|
+
if run_type == "early":
|
|
53
|
+
shortname = "GPM_3IMERGHHE"
|
|
54
|
+
prefix = f"3B-HHR-E.MS.MRG.3IMERG.{date_str}-{time_str}"
|
|
55
|
+
elif run_type == "late":
|
|
56
|
+
shortname = "GPM_3IMERGHHL"
|
|
57
|
+
prefix = f"3B-HHR-L.MS.MRG.3IMERG.{date_str}-{time_str}"
|
|
58
|
+
elif run_type == "final":
|
|
59
|
+
shortname = "GPM_3IMERGHH"
|
|
60
|
+
prefix = f"3B-HHR.MS.MRG.3IMERG.{date_str}-{time_str}"
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError("run_type must be 'early', 'late', or 'final'.")
|
|
63
|
+
elif freq == "daily":
|
|
64
|
+
time_str = "S000000-E235959"
|
|
65
|
+
ext = "nc4"
|
|
66
|
+
if run_type == "early":
|
|
67
|
+
shortname = "GPM_3IMERGDE"
|
|
68
|
+
prefix = f"3B-DAY-E.MS.MRG.3IMERG.{date_str}-{time_str}"
|
|
69
|
+
elif run_type == "late":
|
|
70
|
+
shortname = "GPM_3IMERGDL"
|
|
71
|
+
prefix = f"3B-DAY-L.MS.MRG.3IMERG.{date_str}-{time_str}"
|
|
72
|
+
elif run_type == "final":
|
|
73
|
+
shortname = "GPM_3IMERGDF"
|
|
74
|
+
prefix = f"3B-DAY.MS.MRG.3IMERG.{date_str}-{time_str}"
|
|
75
|
+
else:
|
|
76
|
+
raise ValueError("run_type must be 'early', 'late', or 'final'.")
|
|
77
|
+
elif freq == "monthly":
|
|
78
|
+
time_str = f"S000000-E235959.{month}"
|
|
79
|
+
ext = "HDF5"
|
|
80
|
+
date_str = dt.strftime("%Y%m01") # Monthlies start on 1st
|
|
81
|
+
if run_type == "final":
|
|
82
|
+
shortname = "GPM_3IMERGM"
|
|
83
|
+
prefix = f"3B-MO.MS.MRG.3IMERG.{date_str}-{time_str}"
|
|
84
|
+
else:
|
|
85
|
+
raise ValueError("Monthly frequency usually only supports 'final' run_type.")
|
|
86
|
+
else:
|
|
87
|
+
raise ValueError(f"Unsupported frequency: {freq}")
|
|
88
|
+
|
|
89
|
+
filename = f"/data/GPM_L3/{shortname}.07/{year}/{doy}/{prefix}.{version}.{ext}"
|
|
90
|
+
if freq == "daily":
|
|
91
|
+
filename = f"/data/GPM_L3/{shortname}.07/{year}/{month}/{prefix}.{version}.{ext}"
|
|
92
|
+
elif freq == "monthly":
|
|
93
|
+
filename = f"/data/GPM_L3/{shortname}.07/{year}/{prefix}.{version}.{ext}"
|
|
94
|
+
|
|
95
|
+
filename_encoded = filename.replace("/", "%2F")
|
|
96
|
+
label = f"{prefix}.{version}.{ext}.SUB.nc4"
|
|
97
|
+
|
|
98
|
+
return (
|
|
99
|
+
f"https://gpm1.gesdisc.eosdis.nasa.gov/daac-bin/OTF/HTTP_services.cgi?"
|
|
100
|
+
f"FILENAME={filename_encoded}&SERVICE=L34RS_GPM&LABEL={label}&BBOX={bbox}"
|
|
101
|
+
f"&VERSION=1.02&VARIABLES=precipitation&SHORTNAME={shortname}&DATASET_VERSION=07&FORMAT=nc4%2F"
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def download_granule(self, lat, lon, dt, out_path, run_type="early", freq="hhr", bbox=None):
|
|
105
|
+
"""Tries to download V07C first. If 404, falls back to V07B."""
|
|
106
|
+
for version in ["V07C", "V07B", "V07A"]:
|
|
107
|
+
try:
|
|
108
|
+
url = self._build_url(lat, lon, dt, version, run_type, freq, bbox=bbox)
|
|
109
|
+
except ValueError as e:
|
|
110
|
+
raise e
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
response = self.session.get(url, stream=True, timeout=self.timeout)
|
|
114
|
+
except requests.RequestException as e:
|
|
115
|
+
raise DownloadError(f"Network error while downloading {dt}: {e}") from e
|
|
116
|
+
|
|
117
|
+
if response.status_code == 200:
|
|
118
|
+
with open(out_path, 'wb') as f:
|
|
119
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
120
|
+
if chunk:
|
|
121
|
+
f.write(chunk)
|
|
122
|
+
return True, version
|
|
123
|
+
elif response.status_code == 404:
|
|
124
|
+
continue
|
|
125
|
+
else:
|
|
126
|
+
raise DownloadError(f"Failed to download data. Status: {response.status_code}. Response: {response.text}")
|
|
127
|
+
|
|
128
|
+
raise DownloadError(f"File not available (tried V07C, V07B, V07A) for {dt} | Run: {run_type} | Freq: {freq}")
|
imergpy/plotter.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import matplotlib.pyplot as plt
|
|
3
|
+
import os
|
|
4
|
+
from .analyzer import _precip_column, _time_column
|
|
5
|
+
|
|
6
|
+
def plot_from_excel(excel_path, save_png=True):
|
|
7
|
+
"""
|
|
8
|
+
Reads the IMERG data from an Excel file and plots a time series.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
excel_path (str): Path to the generated Excel file.
|
|
12
|
+
save_png (bool): If True, saves the plot as a PNG image alongside the Excel file.
|
|
13
|
+
"""
|
|
14
|
+
try:
|
|
15
|
+
# Read the Excel file
|
|
16
|
+
df = pd.read_excel(excel_path)
|
|
17
|
+
|
|
18
|
+
time_col = _time_column(df)
|
|
19
|
+
precip_col = _precip_column(df)
|
|
20
|
+
df[time_col] = pd.to_datetime(df[time_col])
|
|
21
|
+
|
|
22
|
+
df = df.sort_values(time_col)
|
|
23
|
+
ylabel = precip_col.replace("Precipitation_", "").replace("_", " ")
|
|
24
|
+
|
|
25
|
+
plt.figure(figsize=(10, 6))
|
|
26
|
+
plt.plot(df[time_col], df[precip_col], marker='o', linestyle='-', color='b', label='Precipitation')
|
|
27
|
+
|
|
28
|
+
lat = df['Requested_Lat'].iloc[0]
|
|
29
|
+
lon = df['Requested_Lon'].iloc[0]
|
|
30
|
+
plt.title(f'IMERG Precipitation\nLat: {lat}, Lon: {lon}')
|
|
31
|
+
plt.xlabel('Time (UTC)')
|
|
32
|
+
plt.ylabel(f'Precipitation ({ylabel})')
|
|
33
|
+
plt.grid(True, linestyle='--', alpha=0.7)
|
|
34
|
+
plt.xticks(rotation=45)
|
|
35
|
+
plt.tight_layout()
|
|
36
|
+
|
|
37
|
+
# Save or display
|
|
38
|
+
if save_png:
|
|
39
|
+
png_path = os.path.splitext(excel_path)[0] + '.png'
|
|
40
|
+
plt.savefig(png_path, dpi=300)
|
|
41
|
+
print(f"Plot saved successfully to: {png_path}")
|
|
42
|
+
else:
|
|
43
|
+
plt.show()
|
|
44
|
+
|
|
45
|
+
# Close plot to free memory
|
|
46
|
+
plt.close()
|
|
47
|
+
|
|
48
|
+
except Exception as e:
|
|
49
|
+
raise Exception(f"Failed to plot time series: {str(e)}")
|
imergpy/processor.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import xarray as xr
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import calendar
|
|
4
|
+
import numpy as np
|
|
5
|
+
from datetime import timedelta
|
|
6
|
+
from dateutil.relativedelta import relativedelta
|
|
7
|
+
from matplotlib.path import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
VALID_INTERPOLATION_METHODS = {"nearest", "linear", "cubic"}
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _time_bounds(time_val, freq, current_dt):
|
|
14
|
+
t_start = time_val[0] if getattr(time_val, 'ndim', 0) > 0 else time_val
|
|
15
|
+
if hasattr(t_start, 'tolist'):
|
|
16
|
+
t_start = pd.to_datetime(t_start)
|
|
17
|
+
|
|
18
|
+
if freq == "hhr":
|
|
19
|
+
return t_start, t_start + timedelta(minutes=30), "Precipitation_mm_per_half_hour", 0.5
|
|
20
|
+
if freq == "daily":
|
|
21
|
+
return t_start, t_start + timedelta(days=1), "Precipitation_mm_per_day", 1.0
|
|
22
|
+
if freq == "monthly":
|
|
23
|
+
days_in_month = calendar.monthrange(current_dt.year, current_dt.month)[1]
|
|
24
|
+
return t_start, t_start + relativedelta(months=1), "Precipitation_mm_per_month", 24 * days_in_month
|
|
25
|
+
raise ValueError("freq must be 'hhr', 'daily', or 'monthly'.")
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _iter_polygons(geometry):
|
|
29
|
+
if not geometry:
|
|
30
|
+
return []
|
|
31
|
+
if geometry.get("type") == "Polygon":
|
|
32
|
+
return [geometry["coordinates"]]
|
|
33
|
+
if geometry.get("type") == "MultiPolygon":
|
|
34
|
+
return geometry["coordinates"]
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _geometry_mask(lats, lons, geometry):
|
|
39
|
+
lon_grid, lat_grid = np.meshgrid(lons, lats)
|
|
40
|
+
points = np.column_stack([lon_grid.ravel(), lat_grid.ravel()])
|
|
41
|
+
mask = np.zeros(points.shape[0], dtype=bool)
|
|
42
|
+
|
|
43
|
+
for polygon in _iter_polygons(geometry):
|
|
44
|
+
if not polygon:
|
|
45
|
+
continue
|
|
46
|
+
exterior = Path(polygon[0])
|
|
47
|
+
poly_mask = exterior.contains_points(points)
|
|
48
|
+
for hole in polygon[1:]:
|
|
49
|
+
poly_mask &= ~Path(hole).contains_points(points)
|
|
50
|
+
mask |= poly_mask
|
|
51
|
+
|
|
52
|
+
return mask.reshape(lat_grid.shape)
|
|
53
|
+
|
|
54
|
+
def extract_precipitation(nc_path, target_lat, target_lon, method="nearest", freq="hhr", current_dt=None):
|
|
55
|
+
"""
|
|
56
|
+
Reads the downloaded NetCDF file, extracts precipitation at the specified point,
|
|
57
|
+
and returns a dictionary with the extracted data and proper units.
|
|
58
|
+
Now includes Start_Time and End_Time.
|
|
59
|
+
"""
|
|
60
|
+
if method not in VALID_INTERPOLATION_METHODS:
|
|
61
|
+
raise ValueError("method must be 'nearest', 'linear', or 'cubic'.")
|
|
62
|
+
if freq not in {"hhr", "daily", "monthly"}:
|
|
63
|
+
raise ValueError("freq must be 'hhr', 'daily', or 'monthly'.")
|
|
64
|
+
if freq == "monthly" and current_dt is None:
|
|
65
|
+
raise ValueError("current_dt is required when freq='monthly'.")
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
ds = xr.open_dataset(nc_path)
|
|
69
|
+
|
|
70
|
+
if method == "nearest":
|
|
71
|
+
point_data = ds.sel(lat=target_lat, lon=target_lon, method="nearest")
|
|
72
|
+
actual_lat = float(point_data['lat'].values.item())
|
|
73
|
+
actual_lon = float(point_data['lon'].values.item())
|
|
74
|
+
else:
|
|
75
|
+
point_data = ds.interp(lat=target_lat, lon=target_lon, method=method)
|
|
76
|
+
actual_lat = target_lat
|
|
77
|
+
actual_lon = target_lon
|
|
78
|
+
|
|
79
|
+
precip_value = point_data['precipitation'].values
|
|
80
|
+
time_val = point_data['time'].values
|
|
81
|
+
|
|
82
|
+
t_start, t_end, col_name, scale = _time_bounds(time_val, freq, current_dt)
|
|
83
|
+
val = float(precip_value.item() if hasattr(precip_value, 'item') else precip_value) * scale
|
|
84
|
+
return {
|
|
85
|
+
"Requested_Lat": target_lat,
|
|
86
|
+
"Requested_Lon": target_lon,
|
|
87
|
+
"Actual_Lat": actual_lat,
|
|
88
|
+
"Actual_Lon": actual_lon,
|
|
89
|
+
"Interpolation": method,
|
|
90
|
+
"Start_Time": t_start,
|
|
91
|
+
"End_Time": t_end,
|
|
92
|
+
col_name: val
|
|
93
|
+
}
|
|
94
|
+
except Exception as e:
|
|
95
|
+
raise Exception(f"Failed to process NetCDF: {str(e)}")
|
|
96
|
+
finally:
|
|
97
|
+
if 'ds' in locals():
|
|
98
|
+
ds.close()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def extract_area_average(nc_path, bbox, freq="hhr", current_dt=None, geometry=None, region_name=None, region_type="area"):
|
|
102
|
+
if freq not in {"hhr", "daily", "monthly"}:
|
|
103
|
+
raise ValueError("freq must be 'hhr', 'daily', or 'monthly'.")
|
|
104
|
+
if freq == "monthly" and current_dt is None:
|
|
105
|
+
raise ValueError("current_dt is required when freq='monthly'.")
|
|
106
|
+
|
|
107
|
+
try:
|
|
108
|
+
ds = xr.open_dataset(nc_path)
|
|
109
|
+
da = ds["precipitation"]
|
|
110
|
+
if "time" in da.dims:
|
|
111
|
+
da = da.isel(time=0)
|
|
112
|
+
if "lat" in da.dims and "lon" in da.dims:
|
|
113
|
+
da = da.transpose("lat", "lon")
|
|
114
|
+
|
|
115
|
+
lats = ds["lat"].values
|
|
116
|
+
lons = ds["lon"].values
|
|
117
|
+
values = np.asarray(da.values, dtype=float)
|
|
118
|
+
while values.ndim > 2:
|
|
119
|
+
values = values[0]
|
|
120
|
+
if values.shape != (len(lats), len(lons)):
|
|
121
|
+
if values.T.shape == (len(lats), len(lons)):
|
|
122
|
+
values = values.T
|
|
123
|
+
else:
|
|
124
|
+
raise ValueError(
|
|
125
|
+
f"Unexpected precipitation grid shape {values.shape}; expected {(len(lats), len(lons))}."
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
mask = np.isfinite(values)
|
|
129
|
+
if geometry:
|
|
130
|
+
geom_mask = _geometry_mask(lats, lons, geometry)
|
|
131
|
+
if geom_mask.any():
|
|
132
|
+
mask &= geom_mask
|
|
133
|
+
|
|
134
|
+
if not mask.any():
|
|
135
|
+
raise ValueError("No IMERG grid cells found inside the selected region.")
|
|
136
|
+
|
|
137
|
+
mean_rate = float(np.nanmean(np.where(mask, values, np.nan)))
|
|
138
|
+
t_start, t_end, col_name, scale = _time_bounds(ds["time"].values, freq, current_dt)
|
|
139
|
+
min_lat, min_lon, max_lat, max_lon = bbox
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
"Region_Type": region_type,
|
|
143
|
+
"Region_Name": region_name or region_type,
|
|
144
|
+
"Min_Lat": min_lat,
|
|
145
|
+
"Min_Lon": min_lon,
|
|
146
|
+
"Max_Lat": max_lat,
|
|
147
|
+
"Max_Lon": max_lon,
|
|
148
|
+
"Grid_Cells_Averaged": int(mask.sum()),
|
|
149
|
+
"Start_Time": t_start,
|
|
150
|
+
"End_Time": t_end,
|
|
151
|
+
col_name: mean_rate * scale,
|
|
152
|
+
}
|
|
153
|
+
except Exception as e:
|
|
154
|
+
raise Exception(f"Failed to process area NetCDF: {str(e)}")
|
|
155
|
+
finally:
|
|
156
|
+
if 'ds' in locals():
|
|
157
|
+
ds.close()
|