point-collocation 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ """point_collocation — point-based matchups against cloud-hosted granules.
2
+
3
+ Public API
4
+ ----------
5
+ :func:`plan`
6
+ Build a matchup plan by searching for granules that cover the given points.
7
+ :func:`matchup`
8
+ Execute a :class:`Plan` to extract dataset variables at each point.
9
+
10
+ Quick start
11
+ -----------
12
+ ::
13
+
14
+ import earthaccess
15
+ import point_collocation as pc
16
+ import pandas as pd
17
+
18
+ earthaccess.login()
19
+
20
+ df_points = pd.DataFrame({
21
+ "lat": [34.5, 35.1],
22
+ "lon": [-120.3, -119.8],
23
+ "time": pd.to_datetime(["2023-06-01", "2023-06-02"]),
24
+ })
25
+
26
+ plan = pc.plan(
27
+ df_points,
28
+ data_source="earthaccess",
29
+ source_kwargs={
30
+ "short_name": "PACE_OCI_L3M_RRS",
31
+ "granule_name": "*.DAY.*.4km.*",
32
+ },
33
+ )
34
+
35
+ # Inspect what variables are available before running the full matchup
36
+ plan.show_variables(geometry="grid")
37
+
38
+ # Open a single granule interactively
39
+ ds = plan.open_dataset(plan[0])
40
+
41
+ out = pc.matchup(plan, geometry="grid", variables=["Rrs"])
42
+
43
+ Optional xarray accessor
44
+ -------------------------
45
+ Register the ``Dataset.pc`` accessor for interactive use::
46
+
47
+ import point_collocation.extensions.accessor # noqa: F401
48
+
49
+ ds = xr.open_dataset(...)
50
+ out = ds.pc.extract_points(df_points, variables=["sst"])
51
+ """
52
+
53
+ from point_collocation.core.engine import matchup
54
+ from point_collocation.core.plan import Plan, plan
55
+
56
+ __all__ = ["matchup", "plan", "Plan"]
@@ -0,0 +1,16 @@
1
+ """Source adapters that normalise heterogeneous inputs into the SourceProtocol.
2
+
3
+ Built-in adapters
4
+ -----------------
5
+ earthaccess : wraps file-like objects returned by ``earthaccess.open()``
6
+
7
+ Future adapters (not yet implemented)
8
+ --------------------------------------
9
+ stac : STAC item assets
10
+ url : plain HTTPS URLs
11
+ local : local file paths
12
+ """
13
+
14
+ from point_collocation.adapters.base import SourceAdapter
15
+
16
+ __all__ = ["SourceAdapter"]
@@ -0,0 +1,33 @@
1
+ """Base class for all source adapters.
2
+
3
+ A source adapter wraps a heterogeneous input — a file-like object,
4
+ a URL, a STAC asset — and exposes the uniform
5
+ :class:`~point_collocation.core.types.SourceProtocol` interface that
6
+ the core engine consumes.
7
+
8
+ Subclasses must implement :meth:`open_dataset`.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from abc import ABC, abstractmethod
14
+
15
+
16
+ class SourceAdapter(ABC):
17
+ """Abstract base for source adapters.
18
+
19
+ Subclass this to add support for a new data source. The core
20
+ engine only calls :meth:`open_dataset`; everything else is internal
21
+ to the adapter.
22
+ """
23
+
24
+ @abstractmethod
25
+ def open_dataset(self, **kwargs: object) -> object:
26
+ """Return an ``xarray.Dataset`` for this source.
27
+
28
+ Parameters
29
+ ----------
30
+ **kwargs:
31
+ Forwarded verbatim to ``xarray.open_dataset``.
32
+ """
33
+ raise NotImplementedError # pragma: no cover
@@ -0,0 +1,67 @@
1
+ """earthaccess adapter.
2
+
3
+ Wraps the file-like objects returned by ``earthaccess.open()`` so they
4
+ satisfy :class:`~point_collocation.core.types.SourceProtocol` and can
5
+ be consumed by the core engine without modification.
6
+
7
+ Usage
8
+ -----
9
+ ::
10
+
11
+ import earthaccess
12
+ import point_collocation as pc
13
+
14
+ plan = pc.plan(
15
+ df_points,
16
+ data_source="earthaccess",
17
+ source_kwargs={
18
+ "short_name": "PACE_OCI_L3M_RRS",
19
+ "granule_name": "*.DAY.*.4km.*",
20
+ },
21
+ )
22
+ out = pc.matchup(plan, geometry="grid", variables=["Rrs"])
23
+
24
+ Responsibilities
25
+ ----------------
26
+ * Accept a single ``earthaccess``-opened file-like object.
27
+ * Open it with ``xarray.open_dataset`` using ``engine="h5netcdf"`` by
28
+ default.
29
+ * Return the ``xarray.Dataset`` to the caller; the caller is responsible
30
+ for closing it.
31
+ """
32
+
33
+ from __future__ import annotations
34
+
35
+ import xarray as xr
36
+
37
+ from point_collocation.adapters.base import SourceAdapter
38
+
39
+
40
+ class EarthAccessAdapter(SourceAdapter):
41
+ """Adapter for ``earthaccess.open()`` file-like objects.
42
+
43
+ Parameters
44
+ ----------
45
+ source:
46
+ A single file-like object as returned by ``earthaccess.open()``.
47
+ """
48
+
49
+ def __init__(self, source: object) -> None:
50
+ self._source = source
51
+
52
+ def open_dataset(self, **kwargs: object) -> xr.Dataset:
53
+ """Open the underlying source with ``xarray.open_dataset``.
54
+
55
+ Parameters
56
+ ----------
57
+ **kwargs:
58
+ Forwarded to ``xarray.open_dataset``. Defaults to
59
+ ``engine="h5netcdf"`` when no ``engine`` key is provided.
60
+
61
+ Returns
62
+ -------
63
+ xarray.Dataset
64
+ """
65
+ if "engine" not in kwargs:
66
+ kwargs["engine"] = "h5netcdf"
67
+ return xr.open_dataset(self._source, **kwargs) # type: ignore[arg-type]
@@ -0,0 +1,5 @@
1
+ """Core matchup engine — earthaccess-agnostic."""
2
+
3
+ from point_collocation.core.engine import matchup
4
+
5
+ __all__ = ["matchup"]
@@ -0,0 +1,148 @@
1
+ """Helpers for working with individual granules (source files).
2
+
3
+ Responsibilities
4
+ ----------------
5
+ * Extract a human-readable identifier from an arbitrary source object.
6
+ * Parse the temporal coverage (start/end date) from a NASA-style L3
7
+ granule filename.
8
+
9
+ Supported filename conventions
10
+ ------------------------------
11
+ ``YYYYDOY`` — single day (DOY = day-of-year, 001–366)
12
+ ``YYYYDOY_YYYYDOY`` — multi-day range (e.g., 8-day composites, monthly)
13
+ ``YYYYMMDD`` — single day in calendar format
14
+ ``YYYYMMDD_YYYYMMDD`` — multi-day range in calendar format
15
+
16
+ The period keyword embedded in the filename (``.DAY.``, ``.8D.``,
17
+ ``.MO.``) is used to infer the end date when only a start date is
18
+ present.
19
+
20
+ Examples of supported filenames
21
+ --------------------------------
22
+ * ``PACE_OCI_2024070.L3m.DAY.RRS.Rrs_412.4km.nc``
23
+ * ``PACE_OCI_2024049_2024056.L3m.8D.CHL.chlor_a.9km.nc``
24
+ * ``AQUA_MODIS.20230601.L3m.DAY.SST.sst.4km.nc``
25
+ * ``AQUA_MODIS.20230601_20230630.L3m.MO.CHL.chlor_a.9km.nc``
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import calendar
31
+ import os
32
+ import pathlib
33
+ import re
34
+ from datetime import datetime, timedelta
35
+
36
+ import pandas as pd
37
+
38
+
39
+ def get_source_id(source: object) -> str:
40
+ """Return a human-readable identifier (basename) for *source*.
41
+
42
+ Tries, in order:
43
+
44
+ 1. ``pathlib.Path`` → ``path.name``
45
+ 2. Plain ``str`` → ``os.path.basename(source)``
46
+ 3. Object with a ``.path`` or ``.name`` string attribute
47
+ 4. ``str(source)`` as last resort
48
+ """
49
+ if isinstance(source, pathlib.Path):
50
+ return source.name
51
+ if isinstance(source, str):
52
+ return os.path.basename(source)
53
+ for attr in ("path", "name"):
54
+ val = getattr(source, attr, None)
55
+ if isinstance(val, str) and val:
56
+ return os.path.basename(val)
57
+ return str(source)
58
+
59
+
60
+ def parse_temporal_range(filename: str) -> tuple[pd.Timestamp, pd.Timestamp]:
61
+ """Return ``(start, end)`` timestamps for the granule named *filename*.
62
+
63
+ Only the basename of *filename* is examined.
64
+
65
+ Parameters
66
+ ----------
67
+ filename:
68
+ File path or basename.
69
+
70
+ Returns
71
+ -------
72
+ tuple[pandas.Timestamp, pandas.Timestamp]
73
+ Inclusive start and end dates (time component is midnight UTC).
74
+
75
+ Raises
76
+ ------
77
+ ValueError
78
+ If no recognisable date pattern is found in *filename*.
79
+ """
80
+ basename = os.path.basename(filename)
81
+
82
+ # ------------------------------------------------------------------
83
+ # DOY-format pair: YYYYDOY_YYYYDOY
84
+ # ------------------------------------------------------------------
85
+ m = re.search(r"(?<!\d)(\d{7})_(\d{7})(?!\d)", basename)
86
+ if m:
87
+ try:
88
+ start = datetime.strptime(m.group(1), "%Y%j")
89
+ end = datetime.strptime(m.group(2), "%Y%j")
90
+ return pd.Timestamp(start), pd.Timestamp(end)
91
+ except ValueError:
92
+ pass
93
+
94
+ # ------------------------------------------------------------------
95
+ # Calendar-format pair: YYYYMMDD_YYYYMMDD
96
+ # ------------------------------------------------------------------
97
+ m = re.search(r"(?<!\d)(20\d{6})_(20\d{6})(?!\d)", basename)
98
+ if m:
99
+ try:
100
+ start = datetime.strptime(m.group(1), "%Y%m%d")
101
+ end = datetime.strptime(m.group(2), "%Y%m%d")
102
+ return pd.Timestamp(start), pd.Timestamp(end)
103
+ except ValueError:
104
+ pass
105
+
106
+ # ------------------------------------------------------------------
107
+ # Single DOY date: YYYYDOY
108
+ # ------------------------------------------------------------------
109
+ m = re.search(r"(?<!\d)(\d{7})(?!\d)", basename)
110
+ if m:
111
+ try:
112
+ start = datetime.strptime(m.group(1), "%Y%j")
113
+ end = _infer_end_date(start, basename)
114
+ return pd.Timestamp(start), pd.Timestamp(end)
115
+ except ValueError:
116
+ pass
117
+
118
+ # ------------------------------------------------------------------
119
+ # Single calendar date: YYYYMMDD (must start with "20…")
120
+ # ------------------------------------------------------------------
121
+ m = re.search(r"(?<!\d)(20\d{6})(?!\d)", basename)
122
+ if m:
123
+ try:
124
+ start = datetime.strptime(m.group(1), "%Y%m%d")
125
+ end = _infer_end_date(start, basename)
126
+ return pd.Timestamp(start), pd.Timestamp(end)
127
+ except ValueError:
128
+ pass
129
+
130
+ raise ValueError(
131
+ f"Cannot parse temporal range from filename: {basename!r}"
132
+ )
133
+
134
+
135
+ # ---------------------------------------------------------------------------
136
+ # Private helpers
137
+ # ---------------------------------------------------------------------------
138
+
139
+ def _infer_end_date(start: datetime, filename: str) -> datetime:
140
+ """Infer the end date from *start* and the period token in *filename*."""
141
+ upper = filename.upper()
142
+ if ".8D." in upper or ".8DAY." in upper:
143
+ return start + timedelta(days=7)
144
+ if ".MO." in upper or ".MON." in upper or ".MONTH." in upper:
145
+ last_day = calendar.monthrange(start.year, start.month)[1]
146
+ return start.replace(day=last_day)
147
+ # Default: treat as a single day (daily composite or unknown period)
148
+ return start