PyPI - point-collocation - Versions diffs - 0.1.0__py3-none-any.whl - Mend

point-collocation 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

point_collocation/__init__.py +56 -0
point_collocation/adapters/__init__.py +16 -0
point_collocation/adapters/base.py +33 -0
point_collocation/adapters/earthaccess.py +67 -0
point_collocation/core/__init__.py +5 -0
point_collocation/core/_granule.py +148 -0
point_collocation/core/engine.py +546 -0
point_collocation/core/plan.py +1035 -0
point_collocation/core/types.py +37 -0
point_collocation/diagnostics/__init__.py +17 -0
point_collocation/diagnostics/report.py +111 -0
point_collocation/extensions/__init__.py +11 -0
point_collocation/extensions/accessor.py +66 -0
point_collocation/extensions/qa.py +38 -0
point_collocation/extensions/spatial.py +38 -0
point_collocation-0.1.0.dist-info/METADATA +246 -0
point_collocation-0.1.0.dist-info/RECORD +19 -0
point_collocation-0.1.0.dist-info/WHEEL +4 -0
point_collocation-0.1.0.dist-info/licenses/LICENSE +201 -0

point_collocation/__init__.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""point_collocation — point-based matchups against cloud-hosted granules.
+Public API
+----------
+:func:`plan`
+    Build a matchup plan by searching for granules that cover the given points.
+:func:`matchup`
+    Execute a :class:`Plan` to extract dataset variables at each point.
+Quick start
+-----------
+::
+    import earthaccess
+    import point_collocation as pc
+    import pandas as pd
+    earthaccess.login()
+    df_points = pd.DataFrame({
+        "lat": [34.5, 35.1],
+        "lon": [-120.3, -119.8],
+        "time": pd.to_datetime(["2023-06-01", "2023-06-02"]),
+    })
+    plan = pc.plan(
+        df_points,
+        data_source="earthaccess",
+        source_kwargs={
+            "short_name": "PACE_OCI_L3M_RRS",
+            "granule_name": "*.DAY.*.4km.*",
+        },
+    )
+    # Inspect what variables are available before running the full matchup
+    plan.show_variables(geometry="grid")
+    # Open a single granule interactively
+    ds = plan.open_dataset(plan[0])
+    out = pc.matchup(plan, geometry="grid", variables=["Rrs"])
+Optional xarray accessor
+-------------------------
+Register the ``Dataset.pc`` accessor for interactive use::
+    import point_collocation.extensions.accessor  # noqa: F401
+    ds = xr.open_dataset(...)
+    out = ds.pc.extract_points(df_points, variables=["sst"])
+"""
+from point_collocation.core.engine import matchup
+from point_collocation.core.plan import Plan, plan
+__all__ = ["matchup", "plan", "Plan"]

point_collocation/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,16 @@
+"""Source adapters that normalise heterogeneous inputs into the SourceProtocol.
+Built-in adapters
+-----------------
+earthaccess  : wraps file-like objects returned by ``earthaccess.open()``
+Future adapters (not yet implemented)
+--------------------------------------
+stac         : STAC item assets
+url          : plain HTTPS URLs
+local        : local file paths
+"""
+from point_collocation.adapters.base import SourceAdapter
+__all__ = ["SourceAdapter"]

point_collocation/adapters/base.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""Base class for all source adapters.
+A source adapter wraps a heterogeneous input — a file-like object,
+a URL, a STAC asset — and exposes the uniform
+:class:`~point_collocation.core.types.SourceProtocol` interface that
+the core engine consumes.
+Subclasses must implement :meth:`open_dataset`.
+"""
+from __future__ import annotations
+from abc import ABC, abstractmethod
+class SourceAdapter(ABC):
+    """Abstract base for source adapters.
+    Subclass this to add support for a new data source.  The core
+    engine only calls :meth:`open_dataset`; everything else is internal
+    to the adapter.
+    """
+    @abstractmethod
+    def open_dataset(self, **kwargs: object) -> object:
+        """Return an ``xarray.Dataset`` for this source.
+        Parameters
+        ----------
+        **kwargs:
+            Forwarded verbatim to ``xarray.open_dataset``.
+        """
+        raise NotImplementedError  # pragma: no cover

point_collocation/adapters/earthaccess.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""earthaccess adapter.
+Wraps the file-like objects returned by ``earthaccess.open()`` so they
+satisfy :class:`~point_collocation.core.types.SourceProtocol` and can
+be consumed by the core engine without modification.
+Usage
+-----
+::
+    import earthaccess
+    import point_collocation as pc
+    plan = pc.plan(
+        df_points,
+        data_source="earthaccess",
+        source_kwargs={
+            "short_name": "PACE_OCI_L3M_RRS",
+            "granule_name": "*.DAY.*.4km.*",
+        },
+    )
+    out = pc.matchup(plan, geometry="grid", variables=["Rrs"])
+Responsibilities
+----------------
+* Accept a single ``earthaccess``-opened file-like object.
+* Open it with ``xarray.open_dataset`` using ``engine="h5netcdf"`` by
+  default.
+* Return the ``xarray.Dataset`` to the caller; the caller is responsible
+  for closing it.
+"""
+from __future__ import annotations
+import xarray as xr
+from point_collocation.adapters.base import SourceAdapter
+class EarthAccessAdapter(SourceAdapter):
+    """Adapter for ``earthaccess.open()`` file-like objects.
+    Parameters
+    ----------
+    source:
+        A single file-like object as returned by ``earthaccess.open()``.
+    """
+    def __init__(self, source: object) -> None:
+        self._source = source
+    def open_dataset(self, **kwargs: object) -> xr.Dataset:
+        """Open the underlying source with ``xarray.open_dataset``.
+        Parameters
+        ----------
+        **kwargs:
+            Forwarded to ``xarray.open_dataset``.  Defaults to
+            ``engine="h5netcdf"`` when no ``engine`` key is provided.
+        Returns
+        -------
+        xarray.Dataset
+        """
+        if "engine" not in kwargs:
+            kwargs["engine"] = "h5netcdf"
+        return xr.open_dataset(self._source, **kwargs)  # type: ignore[arg-type]

point_collocation/core/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Core matchup engine — earthaccess-agnostic."""
+from point_collocation.core.engine import matchup
+__all__ = ["matchup"]

point_collocation/core/_granule.py ADDED Viewed

@@ -0,0 +1,148 @@
+"""Helpers for working with individual granules (source files).
+Responsibilities
+----------------
+* Extract a human-readable identifier from an arbitrary source object.
+* Parse the temporal coverage (start/end date) from a NASA-style L3
+  granule filename.
+Supported filename conventions
+------------------------------
+``YYYYDOY``             — single day (DOY = day-of-year, 001–366)
+``YYYYDOY_YYYYDOY``     — multi-day range (e.g., 8-day composites, monthly)
+``YYYYMMDD``            — single day in calendar format
+``YYYYMMDD_YYYYMMDD``   — multi-day range in calendar format
+The period keyword embedded in the filename (``.DAY.``, ``.8D.``,
+``.MO.``) is used to infer the end date when only a start date is
+present.
+Examples of supported filenames
+--------------------------------
+* ``PACE_OCI_2024070.L3m.DAY.RRS.Rrs_412.4km.nc``
+* ``PACE_OCI_2024049_2024056.L3m.8D.CHL.chlor_a.9km.nc``
+* ``AQUA_MODIS.20230601.L3m.DAY.SST.sst.4km.nc``
+* ``AQUA_MODIS.20230601_20230630.L3m.MO.CHL.chlor_a.9km.nc``
+"""
+from __future__ import annotations
+import calendar
+import os
+import pathlib
+import re
+from datetime import datetime, timedelta
+import pandas as pd
+def get_source_id(source: object) -> str:
+    """Return a human-readable identifier (basename) for *source*.
+    Tries, in order:
+    1. ``pathlib.Path`` → ``path.name``
+    2. Plain ``str`` → ``os.path.basename(source)``
+    3. Object with a ``.path`` or ``.name`` string attribute
+    4. ``str(source)`` as last resort
+    """
+    if isinstance(source, pathlib.Path):
+        return source.name
+    if isinstance(source, str):
+        return os.path.basename(source)
+    for attr in ("path", "name"):
+        val = getattr(source, attr, None)
+        if isinstance(val, str) and val:
+            return os.path.basename(val)
+    return str(source)
+def parse_temporal_range(filename: str) -> tuple[pd.Timestamp, pd.Timestamp]:
+    """Return ``(start, end)`` timestamps for the granule named *filename*.
+    Only the basename of *filename* is examined.
+    Parameters
+    ----------
+    filename:
+        File path or basename.
+    Returns
+    -------
+    tuple[pandas.Timestamp, pandas.Timestamp]
+        Inclusive start and end dates (time component is midnight UTC).
+    Raises
+    ------
+    ValueError
+        If no recognisable date pattern is found in *filename*.
+    """
+    basename = os.path.basename(filename)
+    # ------------------------------------------------------------------
+    # DOY-format pair:  YYYYDOY_YYYYDOY
+    # ------------------------------------------------------------------
+    m = re.search(r"(?<!\d)(\d{7})_(\d{7})(?!\d)", basename)
+    if m:
+        try:
+            start = datetime.strptime(m.group(1), "%Y%j")
+            end = datetime.strptime(m.group(2), "%Y%j")
+            return pd.Timestamp(start), pd.Timestamp(end)
+        except ValueError:
+            pass
+    # ------------------------------------------------------------------
+    # Calendar-format pair:  YYYYMMDD_YYYYMMDD
+    # ------------------------------------------------------------------
+    m = re.search(r"(?<!\d)(20\d{6})_(20\d{6})(?!\d)", basename)
+    if m:
+        try:
+            start = datetime.strptime(m.group(1), "%Y%m%d")
+            end = datetime.strptime(m.group(2), "%Y%m%d")
+            return pd.Timestamp(start), pd.Timestamp(end)
+        except ValueError:
+            pass
+    # ------------------------------------------------------------------
+    # Single DOY date:  YYYYDOY
+    # ------------------------------------------------------------------
+    m = re.search(r"(?<!\d)(\d{7})(?!\d)", basename)
+    if m:
+        try:
+            start = datetime.strptime(m.group(1), "%Y%j")
+            end = _infer_end_date(start, basename)
+            return pd.Timestamp(start), pd.Timestamp(end)
+        except ValueError:
+            pass
+    # ------------------------------------------------------------------
+    # Single calendar date:  YYYYMMDD (must start with "20…")
+    # ------------------------------------------------------------------
+    m = re.search(r"(?<!\d)(20\d{6})(?!\d)", basename)
+    if m:
+        try:
+            start = datetime.strptime(m.group(1), "%Y%m%d")
+            end = _infer_end_date(start, basename)
+            return pd.Timestamp(start), pd.Timestamp(end)
+        except ValueError:
+            pass
+    raise ValueError(
+        f"Cannot parse temporal range from filename: {basename!r}"
+    )
+# ---------------------------------------------------------------------------
+# Private helpers
+# ---------------------------------------------------------------------------
+def _infer_end_date(start: datetime, filename: str) -> datetime:
+    """Infer the end date from *start* and the period token in *filename*."""
+    upper = filename.upper()
+    if ".8D." in upper or ".8DAY." in upper:
+        return start + timedelta(days=7)
+    if ".MO." in upper or ".MON." in upper or ".MONTH." in upper:
+        last_day = calendar.monthrange(start.year, start.month)[1]
+        return start.replace(day=last_day)
+    # Default: treat as a single day (daily composite or unknown period)
+    return start