atmofetch 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
atmofetch/__init__.py ADDED
@@ -0,0 +1,37 @@
1
+ """AtmoFetch - Download meteorological data from publicly available repositories.
2
+
3
+ Data sources:
4
+ - OGIMET (ogimet.com) — SYNOP station data (hourly & daily)
5
+ - University of Wyoming — atmospheric vertical profiling (sounding) data
6
+ - NOAA — Integrated Surface Hourly (ISH) and Mauna Loa CO2 data
7
+ """
8
+
9
+ from atmofetch.noaa import meteo_noaa_hourly, meteo_noaa_co2, nearest_stations_noaa
10
+ from atmofetch.ogimet import (
11
+ meteo_ogimet,
12
+ ogimet_daily,
13
+ ogimet_hourly,
14
+ stations_ogimet,
15
+ nearest_stations_ogimet,
16
+ )
17
+ from atmofetch.wyoming import sounding_wyoming
18
+ from atmofetch._utils.distance import spheroid_dist
19
+
20
+ __version__ = "0.1.0"
21
+
22
+ __all__ = [
23
+ # NOAA
24
+ "meteo_noaa_hourly",
25
+ "meteo_noaa_co2",
26
+ "nearest_stations_noaa",
27
+ # OGIMET
28
+ "meteo_ogimet",
29
+ "ogimet_daily",
30
+ "ogimet_hourly",
31
+ "stations_ogimet",
32
+ "nearest_stations_ogimet",
33
+ # Wyoming
34
+ "sounding_wyoming",
35
+ # Utilities
36
+ "spheroid_dist",
37
+ ]
@@ -0,0 +1,11 @@
1
+ from atmofetch._utils.distance import spheroid_dist
2
+ from atmofetch._utils.network import download, check_internet
3
+ from atmofetch._utils.coordinates import get_coord_from_string, precip_split
4
+
5
+ __all__ = [
6
+ "spheroid_dist",
7
+ "download",
8
+ "check_internet",
9
+ "get_coord_from_string",
10
+ "precip_split",
11
+ ]
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+
8
+
9
+ def get_coord_from_string(txt: str, pattern: str = "Longitude") -> float | None:
10
+ """Extract a decimal-degree coordinate from an Ogimet metadata string.
11
+
12
+ Parameters
13
+ ----------
14
+ txt : raw metadata string (e.g. ``"Latitude: 52-25N Longitude: 016-50E ..."``)
15
+ pattern : ``"Longitude"`` or ``"Latitude"``
16
+ """
17
+ m = re.search(rf"{pattern}:\s*([\d]+)-([\d]+)(?:-([\d]+))?\s*([NSEW])", txt)
18
+ if m is None:
19
+ return None
20
+ deg, minutes, seconds, hemisphere = m.groups()
21
+ seconds = seconds or "0"
22
+ value = int(deg) + (int(minutes) * 5 / 3) / 100 + (int(seconds) * 5 / 3) / 100 / 60
23
+ if hemisphere in ("W", "S"):
24
+ value *= -1
25
+ return value
26
+
27
+
28
+ def precip_split(precip: pd.Series, pattern: str = "/12") -> pd.Series:
29
+ """Split Ogimet precipitation string into numeric values for a given hour window.
30
+
31
+ Parameters
32
+ ----------
33
+ precip : Series of strings like ``"1.2/6h0.0/12h3.4/24h"``
34
+ pattern : ``"/6"``, ``"/12"``, or ``"/24"``
35
+ """
36
+
37
+ def _extract(val: str | None) -> float | None:
38
+ if val is None or (isinstance(val, float) and np.isnan(val)):
39
+ return None
40
+ parts = str(val).split("h")
41
+ for part in parts:
42
+ if pattern in part:
43
+ numeric = part.replace(pattern, "")
44
+ try:
45
+ return float(numeric)
46
+ except ValueError:
47
+ return None
48
+ return None
49
+
50
+ return precip.apply(_extract)
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+
5
+
6
+ def spheroid_dist(p1: tuple[float, float], p2: tuple[float, float]) -> float:
7
+ """Distance between two points on a spheroid using Vincenty's formula.
8
+
9
+ Parameters
10
+ ----------
11
+ p1 : (lon, lat) in decimal degrees
12
+ p2 : (lon, lat) in decimal degrees
13
+
14
+ Returns
15
+ -------
16
+ Distance in kilometres.
17
+ """
18
+ r = 6_371_009 # mean earth radius in metres
19
+ lon1, lat1, lon2, lat2 = (v * math.pi / 180 for v in (*p1, *p2))
20
+ diff_long = lon2 - lon1
21
+
22
+ num = (math.cos(lat2) * math.sin(diff_long)) ** 2 + (
23
+ math.cos(lat1) * math.sin(lat2) - math.sin(lat1) * math.cos(lat2) * math.cos(diff_long)
24
+ ) ** 2
25
+ denom = math.sin(lat1) * math.sin(lat2) + math.cos(lat1) * math.cos(lat2) * math.cos(diff_long)
26
+ d = math.atan2(math.sqrt(num), denom)
27
+ return d * r / 1000
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ import httpx
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ _TIMEOUT = 30.0
11
+
12
+ _OGIMET_HEADERS = {
13
+ "User-Agent": (
14
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:143.0) Gecko/20100101 Firefox/143.0"
15
+ ),
16
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
17
+ "Accept-Language": "pl,en-US;q=0.7,en;q=0.3",
18
+ "Referer": "https://ogimet.com/resynops.phtml.en",
19
+ "Cookie": "cookieconsent_status=dismiss; ogimet_serverid=huracan|aNaPt|aNaPj",
20
+ }
21
+
22
+
23
+ def check_internet() -> bool:
24
+ try:
25
+ httpx.head("https://www.google.com", timeout=5)
26
+ return True
27
+ except httpx.HTTPError:
28
+ return False
29
+
30
+
31
+ def download(url: str, dest: Path | str | None = None, *, timeout: float = _TIMEOUT) -> bytes:
32
+ logger.info("Downloading %s", url)
33
+ resp = httpx.get(url, timeout=timeout, follow_redirects=True)
34
+ resp.raise_for_status()
35
+ if dest is not None:
36
+ Path(dest).write_bytes(resp.content)
37
+ return resp.content
38
+
39
+
40
+ def fetch_text(
41
+ url: str,
42
+ *,
43
+ headers: dict[str, str] | None = None,
44
+ timeout: float = _TIMEOUT,
45
+ ) -> str:
46
+ logger.info("Fetching %s", url)
47
+ resp = httpx.get(url, headers=headers, timeout=timeout, follow_redirects=True)
48
+ resp.raise_for_status()
49
+ return resp.text
50
+
51
+
52
+ def fetch_ogimet(url: str, *, timeout: float = _TIMEOUT) -> str:
53
+ return fetch_text(url, headers=_OGIMET_HEADERS, timeout=timeout)
@@ -0,0 +1,5 @@
1
+ from atmofetch.noaa.hourly import meteo_noaa_hourly
2
+ from atmofetch.noaa.co2 import meteo_noaa_co2
3
+ from atmofetch.noaa.stations import nearest_stations_noaa
4
+
5
+ __all__ = ["meteo_noaa_hourly", "meteo_noaa_co2", "nearest_stations_noaa"]
atmofetch/noaa/co2.py ADDED
@@ -0,0 +1,34 @@
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import logging
5
+
6
+ import pandas as pd
7
+
8
+ from atmofetch._utils.network import fetch_text
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+ _CO2_URL = "https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_mlo.txt"
13
+
14
+
15
+ def meteo_noaa_co2() -> pd.DataFrame:
16
+ """Download monthly CO2 measurements from Mauna Loa Observatory (NOAA).
17
+
18
+ Returns
19
+ -------
20
+ DataFrame with columns: yy, mm, yy_d, co2_avg, co2_interp, co2_seas, ndays, st_dev_days.
21
+ """
22
+ text = fetch_text(_CO2_URL)
23
+
24
+ lines = [line for line in text.splitlines() if not line.startswith("#")]
25
+ cleaned = "\n".join(lines)
26
+
27
+ df = pd.read_csv(
28
+ io.StringIO(cleaned),
29
+ sep=r"\s+",
30
+ header=None,
31
+ names=["yy", "mm", "yy_d", "co2_avg", "co2_interp", "co2_seas", "ndays", "st_dev_days"],
32
+ na_values=["-9.99", "-0.99"],
33
+ )
34
+ return df
@@ -0,0 +1,157 @@
1
+ from __future__ import annotations
2
+
3
+ import gzip
4
+ import io
5
+ import logging
6
+
7
+ import pandas as pd
8
+
9
+ from atmofetch._utils.network import download
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ _BASE_URL = "https://www.ncei.noaa.gov/pub/data/noaa/"
14
+
15
+ _COL_WIDTHS = [
16
+ 4,
17
+ 6,
18
+ 5,
19
+ 4,
20
+ 2,
21
+ 2,
22
+ 2,
23
+ 2,
24
+ 1,
25
+ 6,
26
+ 7,
27
+ 5,
28
+ 5,
29
+ 5,
30
+ 4,
31
+ 3,
32
+ 1,
33
+ 1,
34
+ 4,
35
+ 1,
36
+ 5,
37
+ 1,
38
+ 1,
39
+ 1,
40
+ 6,
41
+ 1,
42
+ 1,
43
+ 1,
44
+ 5,
45
+ 1,
46
+ 5,
47
+ 1,
48
+ 5,
49
+ 1,
50
+ ]
51
+
52
+
53
+ def meteo_noaa_hourly(
54
+ station: str,
55
+ year: int | list[int] = 2019,
56
+ fm12: bool = True,
57
+ ) -> pd.DataFrame:
58
+ """Download hourly NOAA Integrated Surface Hourly (ISH) data.
59
+
60
+ Parameters
61
+ ----------
62
+ station : Station ID string (e.g. ``"037720-99999"``).
63
+ year : Year or list of years.
64
+ fm12 : If True, keep only FM-12 (SYNOP) records.
65
+
66
+ Returns
67
+ -------
68
+ DataFrame with columns: date, year, month, day, hour, lon, lat, alt,
69
+ t2m, dpt2m, ws, wd, slp, visibility.
70
+ """
71
+ if isinstance(year, int):
72
+ year = [year]
73
+
74
+ frames: list[pd.DataFrame] = []
75
+ for yr in year:
76
+ url = f"{_BASE_URL}{yr}/{station}-{yr}.gz"
77
+ try:
78
+ raw = download(url)
79
+ except Exception:
80
+ logger.warning("Failed to download %s", url)
81
+ continue
82
+
83
+ if len(raw) < 100:
84
+ logger.warning("File too small for %s-%s, skipping", station, yr)
85
+ continue
86
+
87
+ text = gzip.decompress(raw).decode("latin-1")
88
+ df = pd.read_fwf(io.StringIO(text), widths=_COL_WIDTHS, header=None)
89
+
90
+ if fm12:
91
+ df = df[df.iloc[:, 11] == "FM-12"]
92
+
93
+ df = df.iloc[:, [3, 4, 5, 6, 9, 10, 12, 15, 18, 24, 28, 30, 32]]
94
+ df.columns = [
95
+ "year",
96
+ "month",
97
+ "day",
98
+ "hour",
99
+ "lat",
100
+ "lon",
101
+ "alt",
102
+ "wd",
103
+ "ws",
104
+ "visibility",
105
+ "t2m",
106
+ "dpt2m",
107
+ "slp",
108
+ ]
109
+
110
+ df["date"] = pd.to_datetime(
111
+ df[["year", "month", "day", "hour"]].assign(minute=0, second=0),
112
+ utc=True,
113
+ )
114
+
115
+ na_map = {
116
+ "t2m": 9999,
117
+ "dpt2m": 9999,
118
+ "ws": 9999,
119
+ "wd": 999,
120
+ "slp": 99999,
121
+ "visibility": 999999,
122
+ }
123
+ for col, sentinel in na_map.items():
124
+ df[col] = df[col].replace(sentinel, pd.NA)
125
+
126
+ df["lon"] = df["lon"] / 1000
127
+ df["lat"] = df["lat"] / 1000
128
+ df["ws"] = df["ws"] / 10
129
+ df["t2m"] = df["t2m"] / 10
130
+ df["dpt2m"] = df["dpt2m"] / 10
131
+ df["slp"] = df["slp"] / 10
132
+
133
+ frames.append(df)
134
+
135
+ if not frames:
136
+ return pd.DataFrame()
137
+
138
+ result = pd.concat(frames, ignore_index=True)
139
+ result = result[
140
+ [
141
+ "date",
142
+ "year",
143
+ "month",
144
+ "day",
145
+ "hour",
146
+ "lon",
147
+ "lat",
148
+ "alt",
149
+ "t2m",
150
+ "dpt2m",
151
+ "ws",
152
+ "wd",
153
+ "slp",
154
+ "visibility",
155
+ ]
156
+ ]
157
+ return result.sort_values("date").reset_index(drop=True)
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ import io
4
+ import logging
5
+ from datetime import date, datetime
6
+
7
+ import pandas as pd
8
+
9
+ from atmofetch._utils.network import fetch_text
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ _COUNTRY_LIST_URL = "https://www.ncei.noaa.gov/pub/data/noaa/country-list.txt"
14
+ _ISD_HISTORY_URL = "https://www.ncei.noaa.gov/pub/data/noaa/isd-history.csv"
15
+
16
+
17
+ def nearest_stations_noaa(
18
+ country: str,
19
+ date_query: date | None = None,
20
+ point: tuple[float, float] | None = None,
21
+ no_of_stations: int = 10,
22
+ ) -> pd.DataFrame:
23
+ """Find nearest NOAA ISH stations for a given country and location.
24
+
25
+ Parameters
26
+ ----------
27
+ country : Country name in uppercase (e.g. ``"UNITED KINGDOM"``).
28
+ date_query : Day for which station availability is checked. Defaults to today.
29
+ point : ``(longitude, latitude)`` reference point. If *None*, the centroid
30
+ of all matching stations is used.
31
+ no_of_stations : How many nearest stations to return.
32
+
33
+ Returns
34
+ -------
35
+ DataFrame sorted by distance with station metadata.
36
+ """
37
+ if date_query is None:
38
+ date_query = date.today()
39
+
40
+ country = country.upper()
41
+
42
+ # --- country list ---
43
+ country_text = fetch_text(_COUNTRY_LIST_URL)
44
+ country_rows: list[dict[str, str]] = []
45
+ for line in country_text.strip().splitlines()[1:]:
46
+ ctry = line[:2].strip()
47
+ name = line[2:].strip()
48
+ if ctry and name:
49
+ country_rows.append({"CTRY": ctry, "countries": name})
50
+ countries_df = pd.DataFrame(country_rows)
51
+
52
+ # --- station history ---
53
+ hist_text = fetch_text(_ISD_HISTORY_URL)
54
+ stations_df = pd.read_csv(io.StringIO(hist_text))
55
+
56
+ merged = stations_df.merge(countries_df, on="CTRY")
57
+
58
+ def _parse_date(val: object) -> date | None:
59
+ s = str(int(val)) if pd.notna(val) else "" # type: ignore[call-overload]
60
+ if len(s) < 8:
61
+ return None
62
+ try:
63
+ return datetime.strptime(s, "%Y%m%d").date()
64
+ except ValueError:
65
+ return None
66
+
67
+ merged["Begin_date"] = merged["BEGIN"].apply(_parse_date)
68
+ merged["End_date"] = merged["END"].apply(_parse_date)
69
+
70
+ result = merged[merged["countries"] == country].copy()
71
+ if result.empty:
72
+ raise ValueError(
73
+ f"No stations found for country '{country}'. "
74
+ "Check names at https://www.ncei.noaa.gov/pub/data/noaa/country-list.txt"
75
+ )
76
+
77
+ mask = (result["Begin_date"].notna()) & (result["End_date"].notna())
78
+ result = result[mask]
79
+ result = result[(result["Begin_date"] <= date_query) & (result["End_date"] >= date_query)]
80
+ if result.empty:
81
+ raise ValueError(f"No stations with data on {date_query} for country '{country}'.")
82
+
83
+ if point is None:
84
+ point = (
85
+ float(result["LON"].mean()),
86
+ float(result["LAT"].mean()),
87
+ )
88
+
89
+ # euclidean approximation scaled to ~km
90
+ result = result.copy()
91
+ result["distance"] = (
92
+ (result["LON"] - point[0]) ** 2 + (result["LAT"] - point[1]) ** 2
93
+ ) ** 0.5 * 112.196672
94
+ result = result.sort_values("distance").head(no_of_stations).reset_index(drop=True)
95
+ return result
@@ -0,0 +1,12 @@
1
+ from atmofetch.ogimet.hourly import ogimet_hourly
2
+ from atmofetch.ogimet.daily import ogimet_daily
3
+ from atmofetch.ogimet.dispatcher import meteo_ogimet
4
+ from atmofetch.ogimet.stations import stations_ogimet, nearest_stations_ogimet
5
+
6
+ __all__ = [
7
+ "ogimet_hourly",
8
+ "ogimet_daily",
9
+ "meteo_ogimet",
10
+ "stations_ogimet",
11
+ "nearest_stations_ogimet",
12
+ ]