imergpy 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
imergpy/__init__.py ADDED
@@ -0,0 +1,12 @@
1
+ # imergpy package
2
+ from .core import get_precipitation
3
+ from .plotter import plot_from_excel
4
+ from .analyzer import add_accumulation, resample_data, calculate_statistics
5
+
6
+ __all__ = [
7
+ "get_precipitation",
8
+ "plot_from_excel",
9
+ "add_accumulation",
10
+ "resample_data",
11
+ "calculate_statistics"
12
+ ]
imergpy/analyzer.py ADDED
@@ -0,0 +1,87 @@
1
+ import pandas as pd
2
+
3
+
4
+ PRECIP_COLUMNS = [
5
+ "Precipitation_mm_per_half_hour",
6
+ "Precipitation_mm_per_day",
7
+ "Precipitation_mm_per_month",
8
+ "Precipitation_mm",
9
+ "Precipitation_mm_hr",
10
+ ]
11
+
12
+
13
+ def _find_column(df, candidates, label):
14
+ for column in candidates:
15
+ if column in df.columns:
16
+ return column
17
+ raise ValueError(f"Could not find {label}. Expected one of: {', '.join(candidates)}")
18
+
19
+
20
+ def _time_column(df):
21
+ return _find_column(df, ["Start_Time", "Time"], "time column")
22
+
23
+
24
+ def _precip_column(df):
25
+ return _find_column(df, PRECIP_COLUMNS, "precipitation column")
26
+
27
+ def add_accumulation(df):
28
+ """
29
+ Takes a DataFrame with half-hourly IMERG data and adds:
30
+ 1. 'Absolute_Precip_mm': Total mm fallen in that 30 min interval (Rate * 0.5)
31
+ 2. 'Cumulative_Precip_mm': Running total of rainfall over the period.
32
+ """
33
+ df = df.copy()
34
+ precip_col = _precip_column(df)
35
+
36
+ if precip_col == "Precipitation_mm_hr":
37
+ df['Absolute_Precip_mm'] = df[precip_col] * 0.5
38
+ else:
39
+ df['Absolute_Precip_mm'] = df[precip_col]
40
+ df['Cumulative_Precip_mm'] = df['Absolute_Precip_mm'].cumsum()
41
+ return df
42
+
43
+ def resample_data(df, freq='D'):
44
+ """
45
+ Resamples the half-hourly data to Daily ('D') or Monthly ('M') totals.
46
+ Args:
47
+ df: Pandas DataFrame from IMERG excel
48
+ freq: 'D' for Daily, 'M' for Monthly
49
+ Returns:
50
+ Resampled DataFrame
51
+ """
52
+ df = df.copy()
53
+ if 'Absolute_Precip_mm' not in df.columns:
54
+ df = add_accumulation(df)
55
+
56
+ time_col = _time_column(df)
57
+ df[time_col] = pd.to_datetime(df[time_col])
58
+ df.set_index(time_col, inplace=True)
59
+
60
+ # Resample and sum the absolute precipitation
61
+ resampled = df[['Absolute_Precip_mm']].resample(freq).sum()
62
+ resampled.rename(columns={'Absolute_Precip_mm': 'Total_Precip_mm'}, inplace=True)
63
+
64
+ return resampled.reset_index()
65
+
66
+ def calculate_statistics(df):
67
+ """
68
+ Calculates extreme event statistics and thresholds for the given data.
69
+ """
70
+ if 'Absolute_Precip_mm' not in df.columns:
71
+ df = add_accumulation(df)
72
+
73
+ # Get daily totals for threshold analysis
74
+ daily_df = resample_data(df, freq='D')
75
+
76
+ stats = {
77
+ "Total_Rainfall_mm": float(df['Absolute_Precip_mm'].sum()),
78
+ "Max_Interval_Precip_mm": float(df['Absolute_Precip_mm'].max()),
79
+ "Max_Daily_Rainfall_mm": float(daily_df['Total_Precip_mm'].max()),
80
+ "Total_Days_Analyzed": int(len(daily_df)),
81
+ "Dry_Days_(<1mm)": int(len(daily_df[daily_df['Total_Precip_mm'] < 1.0])),
82
+ "Wet_Days_(>=1mm)": int(len(daily_df[daily_df['Total_Precip_mm'] >= 1.0])),
83
+ "Heavy_Rain_Days_(>25mm)": int(len(daily_df[daily_df['Total_Precip_mm'] > 25.0])),
84
+ "Extreme_Rain_Days_(>50mm)": int(len(daily_df[daily_df['Total_Precip_mm'] > 50.0]))
85
+ }
86
+
87
+ return stats
imergpy/cli.py ADDED
@@ -0,0 +1,13 @@
1
+ import sys
2
+ from .server import start_server
3
+
4
+ def main():
5
+ """Entry point for the imergpy CLI."""
6
+ try:
7
+ start_server()
8
+ except KeyboardInterrupt:
9
+ print("\nShutting down imergpy interface...")
10
+ sys.exit(0)
11
+
12
+ if __name__ == "__main__":
13
+ main()
imergpy/config.py ADDED
@@ -0,0 +1,27 @@
1
+ # List of exactly 48 time strings used by GES DISC OTF
2
+ IMERG_TIMES = [
3
+ "S000000-E002959.0000", "S003000-E005959.0030", "S010000-E012959.0060", "S013000-E015959.0090",
4
+ "S020000-E022959.0120", "S023000-E025959.0150", "S030000-E032959.0180", "S033000-E035959.0210",
5
+ "S040000-E042959.0240", "S043000-E045959.0270", "S050000-E052959.0300", "S053000-E055959.0330",
6
+ "S060000-E062959.0360", "S063000-E065959.0390", "S070000-E072959.0420", "S073000-E075959.0450",
7
+ "S080000-E082959.0480", "S083000-E085959.0510", "S090000-E092959.0540", "S093000-E095959.0570",
8
+ "S100000-E102959.0600", "S103000-E105959.0630", "S110000-E112959.0660", "S113000-E115959.0690",
9
+ "S120000-E122959.0720", "S123000-E125959.0750", "S130000-E132959.0780", "S133000-E135959.0810",
10
+ "S140000-E142959.0840", "S143000-E145959.0870", "S150000-E152959.0900", "S153000-E155959.0930",
11
+ "S160000-E162959.0960", "S163000-E165959.0990", "S170000-E172959.1020", "S173000-E175959.1050",
12
+ "S180000-E182959.1080", "S183000-E185959.1110", "S190000-E192959.1140", "S193000-E195959.1170",
13
+ "S200000-E202959.1200", "S203000-E205959.1230", "S210000-E212959.1260", "S213000-E215959.1290",
14
+ "S220000-E222959.1320", "S223000-E225959.1350", "S230000-E232959.1380", "S233000-E235959.1410"
15
+ ]
16
+
17
+ def get_time_string(dt_obj):
18
+ """
19
+ Given a datetime object, returns the correct IMERG time string interval.
20
+ IMERG files are 30 min increments starting at top of hour.
21
+ """
22
+ hour = dt_obj.hour
23
+ minute = dt_obj.minute
24
+
25
+ # 0 to 29 mins maps to first half hour, 30 to 59 maps to second
26
+ idx = hour * 2 + (1 if minute >= 30 else 0)
27
+ return IMERG_TIMES[idx]
imergpy/core.py ADDED
@@ -0,0 +1,174 @@
1
+ import os
2
+ import tempfile
3
+ import pandas as pd
4
+ from datetime import datetime, timedelta
5
+ from dateutil.relativedelta import relativedelta
6
+ from .downloader import DownloadError, EarthdataDownloader
7
+ from .processor import extract_area_average, extract_precipitation
8
+
9
+
10
+ VALID_RUN_TYPES = {"early", "late", "final"}
11
+ VALID_FREQUENCIES = {"hhr", "daily", "monthly"}
12
+ VALID_INTERPOLATION_METHODS = {"nearest", "linear", "cubic"}
13
+
14
+
15
+ def _parse_datetime(value):
16
+ value = str(value).replace('T', ' ')
17
+ formats = {
18
+ 7: "%Y-%m",
19
+ 10: "%Y-%m-%d",
20
+ 16: "%Y-%m-%d %H:%M",
21
+ }
22
+ try:
23
+ return datetime.strptime(value, formats[len(value)])
24
+ except (KeyError, ValueError) as e:
25
+ raise ValueError(f"Invalid date format: {value}. Use YYYY-MM, YYYY-MM-DD, or YYYY-MM-DD HH:MM.") from e
26
+
27
+
28
+ def _validate_inputs(lat, lon, run_type, freq, interp_method):
29
+ if not -90 <= float(lat) <= 90:
30
+ raise ValueError("lat must be between -90 and 90.")
31
+ if not -180 <= float(lon) <= 180:
32
+ raise ValueError("lon must be between -180 and 180.")
33
+ if run_type not in VALID_RUN_TYPES:
34
+ raise ValueError("run_type must be 'early', 'late', or 'final'.")
35
+ if freq not in VALID_FREQUENCIES:
36
+ raise ValueError("freq must be 'hhr', 'daily', or 'monthly'.")
37
+ if interp_method not in VALID_INTERPOLATION_METHODS:
38
+ raise ValueError("interp_method must be 'nearest', 'linear', or 'cubic'.")
39
+ if freq == "monthly" and run_type != "final":
40
+ raise ValueError("monthly frequency only supports run_type='final'.")
41
+
42
+
43
+ def _excel_dataframe(results):
44
+ df = pd.DataFrame(results)
45
+ preferred_order = [
46
+ "Start_Time",
47
+ "End_Time",
48
+ "Requested_Lat",
49
+ "Requested_Lon",
50
+ "Actual_Lat",
51
+ "Actual_Lon",
52
+ "Interpolation",
53
+ "IMERG_Version",
54
+ "Run_Type",
55
+ "Region_Type",
56
+ "Region_Name",
57
+ "Min_Lat",
58
+ "Min_Lon",
59
+ "Max_Lat",
60
+ "Max_Lon",
61
+ "Grid_Cells_Averaged",
62
+ ]
63
+ precip_cols = [c for c in df.columns if c.startswith("Precipitation_")]
64
+ ordered_cols = [c for c in preferred_order + precip_cols if c in df.columns]
65
+ remaining_cols = [c for c in df.columns if c not in ordered_cols]
66
+ df = df[ordered_cols + remaining_cols]
67
+ return df.rename(columns={"Start_Time": "Start Time", "End_Time": "End Time"})
68
+
69
+ def get_precipitation(lat, lon, start_datetime, end_datetime, username, password,
70
+ run_type="early", freq="hhr", interp_method="nearest", out_dir=".",
71
+ progress_callback=None, selection_mode="point", bbox=None,
72
+ geometry=None, region_name=None):
73
+ """
74
+ Main function to download IMERG data for a time period and save to Excel.
75
+ Now includes dual Start_Time and End_Time columns.
76
+ """
77
+ if selection_mode == "point":
78
+ _validate_inputs(lat, lon, run_type, freq, interp_method)
79
+ else:
80
+ _validate_inputs(lat, lon, run_type, freq, "nearest")
81
+ if not bbox:
82
+ raise ValueError("bbox is required for country and square-area downloads.")
83
+ dt_start = _parse_datetime(start_datetime)
84
+ dt_end = _parse_datetime(end_datetime)
85
+
86
+ if dt_end < dt_start:
87
+ raise ValueError("end_datetime must be after start_datetime")
88
+
89
+ downloader = EarthdataDownloader(username, password)
90
+
91
+ time_stamp_start = dt_start.strftime("%Y%m%d_%H%M")
92
+ time_stamp_end = dt_end.strftime("%Y%m%d_%H%M")
93
+ region_label = region_name or f"{lat}_{lon}"
94
+ region_label = "".join(c if c.isalnum() or c in "._-" else "_" for c in str(region_label))
95
+ excel_filename = f"IMERG_{run_type}_{freq}_{selection_mode}_{region_label}_{time_stamp_start}_to_{time_stamp_end}.xlsx"
96
+ os.makedirs(out_dir, exist_ok=True)
97
+ excel_path = os.path.join(out_dir, excel_filename)
98
+
99
+ # Snap start time appropriately
100
+ if freq == "hhr":
101
+ minute = 0 if dt_start.minute < 30 else 30
102
+ current_dt = dt_start.replace(minute=minute, second=0, microsecond=0)
103
+ elif freq == "daily":
104
+ current_dt = dt_start.replace(hour=0, minute=0, second=0, microsecond=0)
105
+ dt_end = dt_end.replace(hour=23, minute=59)
106
+ elif freq == "monthly":
107
+ current_dt = dt_start.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
108
+ total_steps = 0
109
+ temp_dt = current_dt
110
+ while temp_dt <= dt_end:
111
+ total_steps += 1
112
+ if freq == "hhr": temp_dt += timedelta(minutes=30)
113
+ elif freq == "daily": temp_dt += timedelta(days=1)
114
+ elif freq == "monthly": temp_dt += relativedelta(months=1)
115
+
116
+ results = []
117
+ failures = []
118
+ step_count = 0
119
+ if progress_callback:
120
+ progress_callback(0)
121
+
122
+ while current_dt <= dt_end:
123
+ step_count += 1
124
+
125
+ fd, temp_nc_path = tempfile.mkstemp(suffix=".nc4")
126
+ os.close(fd)
127
+
128
+ try:
129
+ _, version_used = downloader.download_granule(lat, lon, current_dt, temp_nc_path, run_type, freq, bbox=bbox)
130
+ if selection_mode == "point":
131
+ data_dict = extract_precipitation(temp_nc_path, lat, lon, method=interp_method, freq=freq, current_dt=current_dt)
132
+ else:
133
+ data_dict = extract_area_average(
134
+ temp_nc_path,
135
+ bbox=bbox,
136
+ freq=freq,
137
+ current_dt=current_dt,
138
+ geometry=geometry,
139
+ region_name=region_name,
140
+ region_type=selection_mode,
141
+ )
142
+ data_dict["IMERG_Version"] = version_used
143
+ data_dict["Run_Type"] = run_type
144
+ results.append(data_dict)
145
+ except DownloadError as e:
146
+ failures.append({"datetime": current_dt.isoformat(), "error": str(e)})
147
+ print(f" -> Warning: {e}")
148
+ finally:
149
+ if os.path.exists(temp_nc_path):
150
+ os.remove(temp_nc_path)
151
+ if progress_callback:
152
+ progress_callback(int((step_count / total_steps) * 100))
153
+
154
+ if freq == "hhr": current_dt += timedelta(minutes=30)
155
+ elif freq == "daily": current_dt += timedelta(days=1)
156
+ elif freq == "monthly": current_dt += relativedelta(months=1)
157
+
158
+ if not results:
159
+ details = "; ".join(f"{f['datetime']}: {f['error']}" for f in failures[:3])
160
+ raise RuntimeError(f"No data could be successfully downloaded. {details}")
161
+
162
+ df = _excel_dataframe(results)
163
+ df.to_excel(excel_path, index=False)
164
+
165
+ # JSON-friendly results
166
+ serializable_results = []
167
+ for r in results:
168
+ entry = r.copy()
169
+ for k in ["Start_Time", "End_Time"]:
170
+ if hasattr(entry[k], 'isoformat'): entry[k] = entry[k].isoformat()
171
+ else: entry[k] = str(entry[k])
172
+ serializable_results.append(entry)
173
+
174
+ return excel_path, serializable_results
imergpy/downloader.py ADDED
@@ -0,0 +1,128 @@
1
+ import requests
2
+ from requests.adapters import HTTPAdapter
3
+ from requests.auth import HTTPBasicAuth
4
+ from urllib3.util.retry import Retry
5
+
6
+
7
+ class DownloadError(Exception):
8
+ """Raised when an IMERG granule cannot be downloaded."""
9
+
10
+
11
+ class EarthdataDownloader:
12
+ def __init__(self, username, password, timeout=60, retries=3):
13
+ if not username or not password:
14
+ raise ValueError("NASA Earthdata username and password are required.")
15
+
16
+ self.timeout = timeout
17
+ self.session = requests.Session()
18
+ self.session.auth = HTTPBasicAuth(username, password)
19
+ retry = Retry(
20
+ total=retries,
21
+ connect=retries,
22
+ read=retries,
23
+ status=retries,
24
+ backoff_factor=0.5,
25
+ status_forcelist=(429, 500, 502, 503, 504),
26
+ allowed_methods=("GET",),
27
+ )
28
+ adapter = HTTPAdapter(max_retries=retry)
29
+ self.session.mount("https://", adapter)
30
+ self.session.mount("http://", adapter)
31
+
32
+ def _build_url(self, lat, lon, dt, version, run_type="early", freq="hhr", bbox=None):
33
+ from .config import get_time_string
34
+
35
+ if bbox:
36
+ min_lat, min_lon, max_lat, max_lon = bbox
37
+ min_lat, max_lat = max(-90.0, float(min_lat)), min(90.0, float(max_lat))
38
+ min_lon, max_lon = max(-180.0, float(min_lon)), min(180.0, float(max_lon))
39
+ else:
40
+ min_lat, max_lat = max(-90.0, lat - 0.1), min(90.0, lat + 0.1)
41
+ min_lon, max_lon = max(-180.0, lon - 0.1), min(180.0, lon + 0.1)
42
+ bbox = f"{min_lat},{min_lon},{max_lat},{max_lon}".replace(",", "%2C")
43
+
44
+ year = dt.strftime("%Y")
45
+ month = dt.strftime("%m")
46
+ doy = dt.strftime("%j")
47
+ date_str = dt.strftime("%Y%m%d")
48
+
49
+ if freq == "hhr":
50
+ time_str = get_time_string(dt)
51
+ ext = "HDF5"
52
+ if run_type == "early":
53
+ shortname = "GPM_3IMERGHHE"
54
+ prefix = f"3B-HHR-E.MS.MRG.3IMERG.{date_str}-{time_str}"
55
+ elif run_type == "late":
56
+ shortname = "GPM_3IMERGHHL"
57
+ prefix = f"3B-HHR-L.MS.MRG.3IMERG.{date_str}-{time_str}"
58
+ elif run_type == "final":
59
+ shortname = "GPM_3IMERGHH"
60
+ prefix = f"3B-HHR.MS.MRG.3IMERG.{date_str}-{time_str}"
61
+ else:
62
+ raise ValueError("run_type must be 'early', 'late', or 'final'.")
63
+ elif freq == "daily":
64
+ time_str = "S000000-E235959"
65
+ ext = "nc4"
66
+ if run_type == "early":
67
+ shortname = "GPM_3IMERGDE"
68
+ prefix = f"3B-DAY-E.MS.MRG.3IMERG.{date_str}-{time_str}"
69
+ elif run_type == "late":
70
+ shortname = "GPM_3IMERGDL"
71
+ prefix = f"3B-DAY-L.MS.MRG.3IMERG.{date_str}-{time_str}"
72
+ elif run_type == "final":
73
+ shortname = "GPM_3IMERGDF"
74
+ prefix = f"3B-DAY.MS.MRG.3IMERG.{date_str}-{time_str}"
75
+ else:
76
+ raise ValueError("run_type must be 'early', 'late', or 'final'.")
77
+ elif freq == "monthly":
78
+ time_str = f"S000000-E235959.{month}"
79
+ ext = "HDF5"
80
+ date_str = dt.strftime("%Y%m01") # Monthlies start on 1st
81
+ if run_type == "final":
82
+ shortname = "GPM_3IMERGM"
83
+ prefix = f"3B-MO.MS.MRG.3IMERG.{date_str}-{time_str}"
84
+ else:
85
+ raise ValueError("Monthly frequency usually only supports 'final' run_type.")
86
+ else:
87
+ raise ValueError(f"Unsupported frequency: {freq}")
88
+
89
+ filename = f"/data/GPM_L3/{shortname}.07/{year}/{doy}/{prefix}.{version}.{ext}"
90
+ if freq == "daily":
91
+ filename = f"/data/GPM_L3/{shortname}.07/{year}/{month}/{prefix}.{version}.{ext}"
92
+ elif freq == "monthly":
93
+ filename = f"/data/GPM_L3/{shortname}.07/{year}/{prefix}.{version}.{ext}"
94
+
95
+ filename_encoded = filename.replace("/", "%2F")
96
+ label = f"{prefix}.{version}.{ext}.SUB.nc4"
97
+
98
+ return (
99
+ f"https://gpm1.gesdisc.eosdis.nasa.gov/daac-bin/OTF/HTTP_services.cgi?"
100
+ f"FILENAME={filename_encoded}&SERVICE=L34RS_GPM&LABEL={label}&BBOX={bbox}"
101
+ f"&VERSION=1.02&VARIABLES=precipitation&SHORTNAME={shortname}&DATASET_VERSION=07&FORMAT=nc4%2F"
102
+ )
103
+
104
+ def download_granule(self, lat, lon, dt, out_path, run_type="early", freq="hhr", bbox=None):
105
+ """Tries to download V07C first. If 404, falls back to V07B."""
106
+ for version in ["V07C", "V07B", "V07A"]:
107
+ try:
108
+ url = self._build_url(lat, lon, dt, version, run_type, freq, bbox=bbox)
109
+ except ValueError as e:
110
+ raise e
111
+
112
+ try:
113
+ response = self.session.get(url, stream=True, timeout=self.timeout)
114
+ except requests.RequestException as e:
115
+ raise DownloadError(f"Network error while downloading {dt}: {e}") from e
116
+
117
+ if response.status_code == 200:
118
+ with open(out_path, 'wb') as f:
119
+ for chunk in response.iter_content(chunk_size=8192):
120
+ if chunk:
121
+ f.write(chunk)
122
+ return True, version
123
+ elif response.status_code == 404:
124
+ continue
125
+ else:
126
+ raise DownloadError(f"Failed to download data. Status: {response.status_code}. Response: {response.text}")
127
+
128
+ raise DownloadError(f"File not available (tried V07C, V07B, V07A) for {dt} | Run: {run_type} | Freq: {freq}")
imergpy/plotter.py ADDED
@@ -0,0 +1,49 @@
1
+ import pandas as pd
2
+ import matplotlib.pyplot as plt
3
+ import os
4
+ from .analyzer import _precip_column, _time_column
5
+
6
+ def plot_from_excel(excel_path, save_png=True):
7
+ """
8
+ Reads the IMERG data from an Excel file and plots a time series.
9
+
10
+ Args:
11
+ excel_path (str): Path to the generated Excel file.
12
+ save_png (bool): If True, saves the plot as a PNG image alongside the Excel file.
13
+ """
14
+ try:
15
+ # Read the Excel file
16
+ df = pd.read_excel(excel_path)
17
+
18
+ time_col = _time_column(df)
19
+ precip_col = _precip_column(df)
20
+ df[time_col] = pd.to_datetime(df[time_col])
21
+
22
+ df = df.sort_values(time_col)
23
+ ylabel = precip_col.replace("Precipitation_", "").replace("_", " ")
24
+
25
+ plt.figure(figsize=(10, 6))
26
+ plt.plot(df[time_col], df[precip_col], marker='o', linestyle='-', color='b', label='Precipitation')
27
+
28
+ lat = df['Requested_Lat'].iloc[0]
29
+ lon = df['Requested_Lon'].iloc[0]
30
+ plt.title(f'IMERG Precipitation\nLat: {lat}, Lon: {lon}')
31
+ plt.xlabel('Time (UTC)')
32
+ plt.ylabel(f'Precipitation ({ylabel})')
33
+ plt.grid(True, linestyle='--', alpha=0.7)
34
+ plt.xticks(rotation=45)
35
+ plt.tight_layout()
36
+
37
+ # Save or display
38
+ if save_png:
39
+ png_path = os.path.splitext(excel_path)[0] + '.png'
40
+ plt.savefig(png_path, dpi=300)
41
+ print(f"Plot saved successfully to: {png_path}")
42
+ else:
43
+ plt.show()
44
+
45
+ # Close plot to free memory
46
+ plt.close()
47
+
48
+ except Exception as e:
49
+ raise Exception(f"Failed to plot time series: {str(e)}")
imergpy/processor.py ADDED
@@ -0,0 +1,157 @@
1
+ import xarray as xr
2
+ import pandas as pd
3
+ import calendar
4
+ import numpy as np
5
+ from datetime import timedelta
6
+ from dateutil.relativedelta import relativedelta
7
+ from matplotlib.path import Path
8
+
9
+
10
+ VALID_INTERPOLATION_METHODS = {"nearest", "linear", "cubic"}
11
+
12
+
13
+ def _time_bounds(time_val, freq, current_dt):
14
+ t_start = time_val[0] if getattr(time_val, 'ndim', 0) > 0 else time_val
15
+ if hasattr(t_start, 'tolist'):
16
+ t_start = pd.to_datetime(t_start)
17
+
18
+ if freq == "hhr":
19
+ return t_start, t_start + timedelta(minutes=30), "Precipitation_mm_per_half_hour", 0.5
20
+ if freq == "daily":
21
+ return t_start, t_start + timedelta(days=1), "Precipitation_mm_per_day", 1.0
22
+ if freq == "monthly":
23
+ days_in_month = calendar.monthrange(current_dt.year, current_dt.month)[1]
24
+ return t_start, t_start + relativedelta(months=1), "Precipitation_mm_per_month", 24 * days_in_month
25
+ raise ValueError("freq must be 'hhr', 'daily', or 'monthly'.")
26
+
27
+
28
+ def _iter_polygons(geometry):
29
+ if not geometry:
30
+ return []
31
+ if geometry.get("type") == "Polygon":
32
+ return [geometry["coordinates"]]
33
+ if geometry.get("type") == "MultiPolygon":
34
+ return geometry["coordinates"]
35
+ return []
36
+
37
+
38
+ def _geometry_mask(lats, lons, geometry):
39
+ lon_grid, lat_grid = np.meshgrid(lons, lats)
40
+ points = np.column_stack([lon_grid.ravel(), lat_grid.ravel()])
41
+ mask = np.zeros(points.shape[0], dtype=bool)
42
+
43
+ for polygon in _iter_polygons(geometry):
44
+ if not polygon:
45
+ continue
46
+ exterior = Path(polygon[0])
47
+ poly_mask = exterior.contains_points(points)
48
+ for hole in polygon[1:]:
49
+ poly_mask &= ~Path(hole).contains_points(points)
50
+ mask |= poly_mask
51
+
52
+ return mask.reshape(lat_grid.shape)
53
+
54
+ def extract_precipitation(nc_path, target_lat, target_lon, method="nearest", freq="hhr", current_dt=None):
55
+ """
56
+ Reads the downloaded NetCDF file, extracts precipitation at the specified point,
57
+ and returns a dictionary with the extracted data and proper units.
58
+ Now includes Start_Time and End_Time.
59
+ """
60
+ if method not in VALID_INTERPOLATION_METHODS:
61
+ raise ValueError("method must be 'nearest', 'linear', or 'cubic'.")
62
+ if freq not in {"hhr", "daily", "monthly"}:
63
+ raise ValueError("freq must be 'hhr', 'daily', or 'monthly'.")
64
+ if freq == "monthly" and current_dt is None:
65
+ raise ValueError("current_dt is required when freq='monthly'.")
66
+
67
+ try:
68
+ ds = xr.open_dataset(nc_path)
69
+
70
+ if method == "nearest":
71
+ point_data = ds.sel(lat=target_lat, lon=target_lon, method="nearest")
72
+ actual_lat = float(point_data['lat'].values.item())
73
+ actual_lon = float(point_data['lon'].values.item())
74
+ else:
75
+ point_data = ds.interp(lat=target_lat, lon=target_lon, method=method)
76
+ actual_lat = target_lat
77
+ actual_lon = target_lon
78
+
79
+ precip_value = point_data['precipitation'].values
80
+ time_val = point_data['time'].values
81
+
82
+ t_start, t_end, col_name, scale = _time_bounds(time_val, freq, current_dt)
83
+ val = float(precip_value.item() if hasattr(precip_value, 'item') else precip_value) * scale
84
+ return {
85
+ "Requested_Lat": target_lat,
86
+ "Requested_Lon": target_lon,
87
+ "Actual_Lat": actual_lat,
88
+ "Actual_Lon": actual_lon,
89
+ "Interpolation": method,
90
+ "Start_Time": t_start,
91
+ "End_Time": t_end,
92
+ col_name: val
93
+ }
94
+ except Exception as e:
95
+ raise Exception(f"Failed to process NetCDF: {str(e)}")
96
+ finally:
97
+ if 'ds' in locals():
98
+ ds.close()
99
+
100
+
101
+ def extract_area_average(nc_path, bbox, freq="hhr", current_dt=None, geometry=None, region_name=None, region_type="area"):
102
+ if freq not in {"hhr", "daily", "monthly"}:
103
+ raise ValueError("freq must be 'hhr', 'daily', or 'monthly'.")
104
+ if freq == "monthly" and current_dt is None:
105
+ raise ValueError("current_dt is required when freq='monthly'.")
106
+
107
+ try:
108
+ ds = xr.open_dataset(nc_path)
109
+ da = ds["precipitation"]
110
+ if "time" in da.dims:
111
+ da = da.isel(time=0)
112
+ if "lat" in da.dims and "lon" in da.dims:
113
+ da = da.transpose("lat", "lon")
114
+
115
+ lats = ds["lat"].values
116
+ lons = ds["lon"].values
117
+ values = np.asarray(da.values, dtype=float)
118
+ while values.ndim > 2:
119
+ values = values[0]
120
+ if values.shape != (len(lats), len(lons)):
121
+ if values.T.shape == (len(lats), len(lons)):
122
+ values = values.T
123
+ else:
124
+ raise ValueError(
125
+ f"Unexpected precipitation grid shape {values.shape}; expected {(len(lats), len(lons))}."
126
+ )
127
+
128
+ mask = np.isfinite(values)
129
+ if geometry:
130
+ geom_mask = _geometry_mask(lats, lons, geometry)
131
+ if geom_mask.any():
132
+ mask &= geom_mask
133
+
134
+ if not mask.any():
135
+ raise ValueError("No IMERG grid cells found inside the selected region.")
136
+
137
+ mean_rate = float(np.nanmean(np.where(mask, values, np.nan)))
138
+ t_start, t_end, col_name, scale = _time_bounds(ds["time"].values, freq, current_dt)
139
+ min_lat, min_lon, max_lat, max_lon = bbox
140
+
141
+ return {
142
+ "Region_Type": region_type,
143
+ "Region_Name": region_name or region_type,
144
+ "Min_Lat": min_lat,
145
+ "Min_Lon": min_lon,
146
+ "Max_Lat": max_lat,
147
+ "Max_Lon": max_lon,
148
+ "Grid_Cells_Averaged": int(mask.sum()),
149
+ "Start_Time": t_start,
150
+ "End_Time": t_end,
151
+ col_name: mean_rate * scale,
152
+ }
153
+ except Exception as e:
154
+ raise Exception(f"Failed to process area NetCDF: {str(e)}")
155
+ finally:
156
+ if 'ds' in locals():
157
+ ds.close()