PyPI - atmofetch - Versions diffs - 0.1.0__py3-none-any.whl - Mend

atmofetch 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

atmofetch/__init__.py +37 -0
atmofetch/_utils/__init__.py +11 -0
atmofetch/_utils/coordinates.py +50 -0
atmofetch/_utils/distance.py +27 -0
atmofetch/_utils/network.py +53 -0
atmofetch/noaa/__init__.py +5 -0
atmofetch/noaa/co2.py +34 -0
atmofetch/noaa/hourly.py +157 -0
atmofetch/noaa/stations.py +95 -0
atmofetch/ogimet/__init__.py +12 -0
atmofetch/ogimet/daily.py +249 -0
atmofetch/ogimet/dispatcher.py +42 -0
atmofetch/ogimet/hourly.py +222 -0
atmofetch/ogimet/stations.py +175 -0
atmofetch/wyoming/__init__.py +3 -0
atmofetch/wyoming/sounding.py +145 -0
atmofetch-0.1.0.dist-info/METADATA +131 -0
atmofetch-0.1.0.dist-info/RECORD +20 -0
atmofetch-0.1.0.dist-info/WHEEL +4 -0
atmofetch-0.1.0.dist-info/licenses/LICENSE +21 -0

atmofetch/__init__.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""AtmoFetch - Download meteorological data from publicly available repositories.
+Data sources:
+- OGIMET (ogimet.com) — SYNOP station data (hourly & daily)
+- University of Wyoming — atmospheric vertical profiling (sounding) data
+- NOAA — Integrated Surface Hourly (ISH) and Mauna Loa CO2 data
+"""
+from atmofetch.noaa import meteo_noaa_hourly, meteo_noaa_co2, nearest_stations_noaa
+from atmofetch.ogimet import (
+    meteo_ogimet,
+    ogimet_daily,
+    ogimet_hourly,
+    stations_ogimet,
+    nearest_stations_ogimet,
+)
+from atmofetch.wyoming import sounding_wyoming
+from atmofetch._utils.distance import spheroid_dist
+__version__ = "0.1.0"
+__all__ = [
+    # NOAA
+    "meteo_noaa_hourly",
+    "meteo_noaa_co2",
+    "nearest_stations_noaa",
+    # OGIMET
+    "meteo_ogimet",
+    "ogimet_daily",
+    "ogimet_hourly",
+    "stations_ogimet",
+    "nearest_stations_ogimet",
+    # Wyoming
+    "sounding_wyoming",
+    # Utilities
+    "spheroid_dist",
+]

atmofetch/_utils/__init__.py ADDED Viewed

@@ -0,0 +1,11 @@
+from atmofetch._utils.distance import spheroid_dist
+from atmofetch._utils.network import download, check_internet
+from atmofetch._utils.coordinates import get_coord_from_string, precip_split
+__all__ = [
+    "spheroid_dist",
+    "download",
+    "check_internet",
+    "get_coord_from_string",
+    "precip_split",
+]

atmofetch/_utils/coordinates.py ADDED Viewed

@@ -0,0 +1,50 @@
+from __future__ import annotations
+import re
+import numpy as np
+import pandas as pd
+def get_coord_from_string(txt: str, pattern: str = "Longitude") -> float | None:
+    """Extract a decimal-degree coordinate from an Ogimet metadata string.
+    Parameters
+    ----------
+    txt : raw metadata string (e.g. ``"Latitude: 52-25N  Longitude: 016-50E ..."``)
+    pattern : ``"Longitude"`` or ``"Latitude"``
+    """
+    m = re.search(rf"{pattern}:\s*([\d]+)-([\d]+)(?:-([\d]+))?\s*([NSEW])", txt)
+    if m is None:
+        return None
+    deg, minutes, seconds, hemisphere = m.groups()
+    seconds = seconds or "0"
+    value = int(deg) + (int(minutes) * 5 / 3) / 100 + (int(seconds) * 5 / 3) / 100 / 60
+    if hemisphere in ("W", "S"):
+        value *= -1
+    return value
+def precip_split(precip: pd.Series, pattern: str = "/12") -> pd.Series:
+    """Split Ogimet precipitation string into numeric values for a given hour window.
+    Parameters
+    ----------
+    precip : Series of strings like ``"1.2/6h0.0/12h3.4/24h"``
+    pattern : ``"/6"``, ``"/12"``, or ``"/24"``
+    """
+    def _extract(val: str | None) -> float | None:
+        if val is None or (isinstance(val, float) and np.isnan(val)):
+            return None
+        parts = str(val).split("h")
+        for part in parts:
+            if pattern in part:
+                numeric = part.replace(pattern, "")
+                try:
+                    return float(numeric)
+                except ValueError:
+                    return None
+        return None
+    return precip.apply(_extract)

atmofetch/_utils/distance.py ADDED Viewed

@@ -0,0 +1,27 @@
+from __future__ import annotations
+import math
+def spheroid_dist(p1: tuple[float, float], p2: tuple[float, float]) -> float:
+    """Distance between two points on a spheroid using Vincenty's formula.
+    Parameters
+    ----------
+    p1 : (lon, lat) in decimal degrees
+    p2 : (lon, lat) in decimal degrees
+    Returns
+    -------
+    Distance in kilometres.
+    """
+    r = 6_371_009  # mean earth radius in metres
+    lon1, lat1, lon2, lat2 = (v * math.pi / 180 for v in (*p1, *p2))
+    diff_long = lon2 - lon1
+    num = (math.cos(lat2) * math.sin(diff_long)) ** 2 + (
+        math.cos(lat1) * math.sin(lat2) - math.sin(lat1) * math.cos(lat2) * math.cos(diff_long)
+    ) ** 2
+    denom = math.sin(lat1) * math.sin(lat2) + math.cos(lat1) * math.cos(lat2) * math.cos(diff_long)
+    d = math.atan2(math.sqrt(num), denom)
+    return d * r / 1000

atmofetch/_utils/network.py ADDED Viewed

@@ -0,0 +1,53 @@
+from __future__ import annotations
+import logging
+from pathlib import Path
+import httpx
+logger = logging.getLogger(__name__)
+_TIMEOUT = 30.0
+_OGIMET_HEADERS = {
+    "User-Agent": (
+        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:143.0) Gecko/20100101 Firefox/143.0"
+    ),
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+    "Accept-Language": "pl,en-US;q=0.7,en;q=0.3",
+    "Referer": "https://ogimet.com/resynops.phtml.en",
+    "Cookie": "cookieconsent_status=dismiss; ogimet_serverid=huracan|aNaPt|aNaPj",
+}
+def check_internet() -> bool:
+    try:
+        httpx.head("https://www.google.com", timeout=5)
+        return True
+    except httpx.HTTPError:
+        return False
+def download(url: str, dest: Path | str | None = None, *, timeout: float = _TIMEOUT) -> bytes:
+    logger.info("Downloading %s", url)
+    resp = httpx.get(url, timeout=timeout, follow_redirects=True)
+    resp.raise_for_status()
+    if dest is not None:
+        Path(dest).write_bytes(resp.content)
+    return resp.content
+def fetch_text(
+    url: str,
+    *,
+    headers: dict[str, str] | None = None,
+    timeout: float = _TIMEOUT,
+) -> str:
+    logger.info("Fetching %s", url)
+    resp = httpx.get(url, headers=headers, timeout=timeout, follow_redirects=True)
+    resp.raise_for_status()
+    return resp.text
+def fetch_ogimet(url: str, *, timeout: float = _TIMEOUT) -> str:
+    return fetch_text(url, headers=_OGIMET_HEADERS, timeout=timeout)

atmofetch/noaa/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+from atmofetch.noaa.hourly import meteo_noaa_hourly
+from atmofetch.noaa.co2 import meteo_noaa_co2
+from atmofetch.noaa.stations import nearest_stations_noaa
+__all__ = ["meteo_noaa_hourly", "meteo_noaa_co2", "nearest_stations_noaa"]

atmofetch/noaa/co2.py ADDED Viewed

@@ -0,0 +1,34 @@
+from __future__ import annotations
+import io
+import logging
+import pandas as pd
+from atmofetch._utils.network import fetch_text
+logger = logging.getLogger(__name__)
+_CO2_URL = "https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_mm_mlo.txt"
+def meteo_noaa_co2() -> pd.DataFrame:
+    """Download monthly CO2 measurements from Mauna Loa Observatory (NOAA).
+    Returns
+    -------
+    DataFrame with columns: yy, mm, yy_d, co2_avg, co2_interp, co2_seas, ndays, st_dev_days.
+    """
+    text = fetch_text(_CO2_URL)
+    lines = [line for line in text.splitlines() if not line.startswith("#")]
+    cleaned = "\n".join(lines)
+    df = pd.read_csv(
+        io.StringIO(cleaned),
+        sep=r"\s+",
+        header=None,
+        names=["yy", "mm", "yy_d", "co2_avg", "co2_interp", "co2_seas", "ndays", "st_dev_days"],
+        na_values=["-9.99", "-0.99"],
+    )
+    return df

atmofetch/noaa/hourly.py ADDED Viewed

@@ -0,0 +1,157 @@
+from __future__ import annotations
+import gzip
+import io
+import logging
+import pandas as pd
+from atmofetch._utils.network import download
+logger = logging.getLogger(__name__)
+_BASE_URL = "https://www.ncei.noaa.gov/pub/data/noaa/"
+_COL_WIDTHS = [
+    4,
+    6,
+    5,
+    4,
+    2,
+    2,
+    2,
+    2,
+    1,
+    6,
+    7,
+    5,
+    5,
+    5,
+    4,
+    3,
+    1,
+    1,
+    4,
+    1,
+    5,
+    1,
+    1,
+    1,
+    6,
+    1,
+    1,
+    1,
+    5,
+    1,
+    5,
+    1,
+    5,
+    1,
+]
+def meteo_noaa_hourly(
+    station: str,
+    year: int | list[int] = 2019,
+    fm12: bool = True,
+) -> pd.DataFrame:
+    """Download hourly NOAA Integrated Surface Hourly (ISH) data.
+    Parameters
+    ----------
+    station : Station ID string (e.g. ``"037720-99999"``).
+    year : Year or list of years.
+    fm12 : If True, keep only FM-12 (SYNOP) records.
+    Returns
+    -------
+    DataFrame with columns: date, year, month, day, hour, lon, lat, alt,
+    t2m, dpt2m, ws, wd, slp, visibility.
+    """
+    if isinstance(year, int):
+        year = [year]
+    frames: list[pd.DataFrame] = []
+    for yr in year:
+        url = f"{_BASE_URL}{yr}/{station}-{yr}.gz"
+        try:
+            raw = download(url)
+        except Exception:
+            logger.warning("Failed to download %s", url)
+            continue
+        if len(raw) < 100:
+            logger.warning("File too small for %s-%s, skipping", station, yr)
+            continue
+        text = gzip.decompress(raw).decode("latin-1")
+        df = pd.read_fwf(io.StringIO(text), widths=_COL_WIDTHS, header=None)
+        if fm12:
+            df = df[df.iloc[:, 11] == "FM-12"]
+        df = df.iloc[:, [3, 4, 5, 6, 9, 10, 12, 15, 18, 24, 28, 30, 32]]
+        df.columns = [
+            "year",
+            "month",
+            "day",
+            "hour",
+            "lat",
+            "lon",
+            "alt",
+            "wd",
+            "ws",
+            "visibility",
+            "t2m",
+            "dpt2m",
+            "slp",
+        ]
+        df["date"] = pd.to_datetime(
+            df[["year", "month", "day", "hour"]].assign(minute=0, second=0),
+            utc=True,
+        )
+        na_map = {
+            "t2m": 9999,
+            "dpt2m": 9999,
+            "ws": 9999,
+            "wd": 999,
+            "slp": 99999,
+            "visibility": 999999,
+        }
+        for col, sentinel in na_map.items():
+            df[col] = df[col].replace(sentinel, pd.NA)
+        df["lon"] = df["lon"] / 1000
+        df["lat"] = df["lat"] / 1000
+        df["ws"] = df["ws"] / 10
+        df["t2m"] = df["t2m"] / 10
+        df["dpt2m"] = df["dpt2m"] / 10
+        df["slp"] = df["slp"] / 10
+        frames.append(df)
+    if not frames:
+        return pd.DataFrame()
+    result = pd.concat(frames, ignore_index=True)
+    result = result[
+        [
+            "date",
+            "year",
+            "month",
+            "day",
+            "hour",
+            "lon",
+            "lat",
+            "alt",
+            "t2m",
+            "dpt2m",
+            "ws",
+            "wd",
+            "slp",
+            "visibility",
+        ]
+    ]
+    return result.sort_values("date").reset_index(drop=True)

atmofetch/noaa/stations.py ADDED Viewed

@@ -0,0 +1,95 @@
+from __future__ import annotations
+import io
+import logging
+from datetime import date, datetime
+import pandas as pd
+from atmofetch._utils.network import fetch_text
+logger = logging.getLogger(__name__)
+_COUNTRY_LIST_URL = "https://www.ncei.noaa.gov/pub/data/noaa/country-list.txt"
+_ISD_HISTORY_URL = "https://www.ncei.noaa.gov/pub/data/noaa/isd-history.csv"
+def nearest_stations_noaa(
+    country: str,
+    date_query: date | None = None,
+    point: tuple[float, float] | None = None,
+    no_of_stations: int = 10,
+) -> pd.DataFrame:
+    """Find nearest NOAA ISH stations for a given country and location.
+    Parameters
+    ----------
+    country : Country name in uppercase (e.g. ``"UNITED KINGDOM"``).
+    date_query : Day for which station availability is checked.  Defaults to today.
+    point : ``(longitude, latitude)`` reference point.  If *None*, the centroid
+        of all matching stations is used.
+    no_of_stations : How many nearest stations to return.
+    Returns
+    -------
+    DataFrame sorted by distance with station metadata.
+    """
+    if date_query is None:
+        date_query = date.today()
+    country = country.upper()
+    # --- country list ---
+    country_text = fetch_text(_COUNTRY_LIST_URL)
+    country_rows: list[dict[str, str]] = []
+    for line in country_text.strip().splitlines()[1:]:
+        ctry = line[:2].strip()
+        name = line[2:].strip()
+        if ctry and name:
+            country_rows.append({"CTRY": ctry, "countries": name})
+    countries_df = pd.DataFrame(country_rows)
+    # --- station history ---
+    hist_text = fetch_text(_ISD_HISTORY_URL)
+    stations_df = pd.read_csv(io.StringIO(hist_text))
+    merged = stations_df.merge(countries_df, on="CTRY")
+    def _parse_date(val: object) -> date | None:
+        s = str(int(val)) if pd.notna(val) else ""  # type: ignore[call-overload]
+        if len(s) < 8:
+            return None
+        try:
+            return datetime.strptime(s, "%Y%m%d").date()
+        except ValueError:
+            return None
+    merged["Begin_date"] = merged["BEGIN"].apply(_parse_date)
+    merged["End_date"] = merged["END"].apply(_parse_date)
+    result = merged[merged["countries"] == country].copy()
+    if result.empty:
+        raise ValueError(
+            f"No stations found for country '{country}'. "
+            "Check names at https://www.ncei.noaa.gov/pub/data/noaa/country-list.txt"
+        )
+    mask = (result["Begin_date"].notna()) & (result["End_date"].notna())
+    result = result[mask]
+    result = result[(result["Begin_date"] <= date_query) & (result["End_date"] >= date_query)]
+    if result.empty:
+        raise ValueError(f"No stations with data on {date_query} for country '{country}'.")
+    if point is None:
+        point = (
+            float(result["LON"].mean()),
+            float(result["LAT"].mean()),
+        )
+    # euclidean approximation scaled to ~km
+    result = result.copy()
+    result["distance"] = (
+        (result["LON"] - point[0]) ** 2 + (result["LAT"] - point[1]) ** 2
+    ) ** 0.5 * 112.196672
+    result = result.sort_values("distance").head(no_of_stations).reset_index(drop=True)
+    return result

atmofetch/ogimet/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+from atmofetch.ogimet.hourly import ogimet_hourly
+from atmofetch.ogimet.daily import ogimet_daily
+from atmofetch.ogimet.dispatcher import meteo_ogimet
+from atmofetch.ogimet.stations import stations_ogimet, nearest_stations_ogimet
+__all__ = [
+    "ogimet_hourly",
+    "ogimet_daily",
+    "meteo_ogimet",
+    "stations_ogimet",
+    "nearest_stations_ogimet",
+]