eolas-data 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
eolas_data/client.py ADDED
@@ -0,0 +1,333 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from typing import Optional, Union
5
+
6
+ import pandas as pd
7
+ import requests
8
+
9
+ from .dataset import Dataset
10
+ from .exceptions import APIError, AuthenticationError, NotFoundError, RateLimitError
11
+
12
+ # Imported separately so the names module is also re-exportable for users who
13
+ # want IDE autocomplete on dataset names without instantiating a Client.
14
+ from ._dataset_names import DatasetName # noqa: F401 (public re-export)
15
+
16
+
17
+ BASE_URL = "https://api.eolas.fyi"
18
+
19
+
20
+ def _to_geodataframe(df: "pd.DataFrame", force: bool = False):
21
+ """Convert a DataFrame with a ``geometry_wkt`` column to a GeoDataFrame (CRS WGS84).
22
+
23
+ Returns the GeoDataFrame on success, or ``None`` when geopandas isn't installed
24
+ (and ``force`` is False) so the caller can fall back to the plain DataFrame.
25
+ Raises ImportError when ``force=True`` but geopandas is missing.
26
+ """
27
+ try:
28
+ import geopandas as gpd
29
+ from shapely import wkt as _wkt
30
+ except ImportError:
31
+ if force:
32
+ raise ImportError(
33
+ "geopandas + shapely are required to return geospatial datasets "
34
+ "as GeoDataFrames. Install with: pip install eolas-data[geo]"
35
+ )
36
+ return None
37
+
38
+ geom = df["geometry_wkt"].apply(lambda s: _wkt.loads(s) if isinstance(s, str) and s else None)
39
+ gdf = gpd.GeoDataFrame(df.drop(columns=["geometry_wkt"]), geometry=geom, crs="EPSG:4326")
40
+ for attr in ("eolas_name", "eolas_source"):
41
+ if hasattr(df, attr):
42
+ try:
43
+ setattr(gdf, attr, getattr(df, attr))
44
+ except Exception:
45
+ pass
46
+ return gdf
47
+
48
+
49
+ class Client:
50
+ """Client for the eolas.fyi statistical data API.
51
+
52
+ Args:
53
+ api_key: Your API key. Falls back to the ``EOLAS_API_KEY`` env var
54
+ (or ``VS_API_KEY`` for back-compat with the legacy library).
55
+ base_url: Override the API base URL (useful for testing).
56
+ cache: Cache responses in memory for the lifetime of the client.
57
+ Useful in notebooks to avoid re-fetching on re-runs.
58
+
59
+ Examples::
60
+
61
+ from eolas_data import Client
62
+ client = Client("your_api_key")
63
+
64
+ # Source-specific helpers
65
+ df = client.statsnz("nz_cpi", start="2020-01-01")
66
+ df = client.oecd("nz_gdp")
67
+
68
+ # Generic
69
+ df = client.get("nz_cpi")
70
+
71
+ # Discovery
72
+ all_datasets = client.list()
73
+ nz_datasets = client.list("Stats NZ")
74
+ """
75
+
76
+ def __init__(
77
+ self,
78
+ api_key: Optional[str] = None,
79
+ base_url: str = BASE_URL,
80
+ cache: bool = False,
81
+ ):
82
+ self._key = api_key or os.getenv("EOLAS_API_KEY") or os.getenv("VS_API_KEY") or ""
83
+ self._base = base_url.rstrip("/")
84
+ self._cache: dict | None = {} if cache else None
85
+ self._session = requests.Session()
86
+ self._session.headers.update({"X-API-Key": self._key})
87
+
88
+ def __repr__(self) -> str:
89
+ masked = self._key[:8] + "..." if len(self._key) > 8 else self._key
90
+ cache = " cache=on" if self._cache is not None else ""
91
+ return f"<eolas_data.Client key={masked!r}{cache}>"
92
+
93
+ # ------------------------------------------------------------------
94
+ # Discovery
95
+ # ------------------------------------------------------------------
96
+
97
+ def list(self, source: Optional[str] = None) -> list[dict]:
98
+ """Return metadata for all available datasets.
99
+
100
+ Args:
101
+ source: Optional filter, e.g. ``"Stats NZ"``, ``"OECD"``.
102
+ """
103
+ data = self._get("/v1/datasets")
104
+ items = data.get("datasets", data) if isinstance(data, dict) else data
105
+ if source:
106
+ items = [s for s in items if s.get("source") == source]
107
+ return items
108
+
109
+ def info(self, name: Union[str, "DatasetName"]) -> dict:
110
+ """Return metadata for a single dataset."""
111
+ return self._get(f"/v1/datasets/{name}")
112
+
113
+ # ------------------------------------------------------------------
114
+ # Integrations (Enterprise plan only)
115
+ # ------------------------------------------------------------------
116
+
117
+ def integration(self, platform: str, datasets: list[str]) -> dict[str, str]:
118
+ """Generate connector config files for a third-party data-pipeline tool.
119
+
120
+ Enterprise plan only. Other plans receive an
121
+ :class:`AuthenticationError` with the upgrade message in the detail.
122
+
123
+ Args:
124
+ platform: One of ``"meltano"``, ``"fivetran"``, ``"azure-data-factory"``.
125
+ datasets: Dataset names to include in the generated config.
126
+
127
+ Returns:
128
+ ``{filename: file_contents}`` ready to write to disk.
129
+
130
+ Examples::
131
+
132
+ files = client.integration("meltano", ["nz_cpi", "nz_gdp"])
133
+ for filename, content in files.items():
134
+ Path("./tap-eolas") / filename).write_text(content)
135
+ """
136
+ if not datasets:
137
+ raise ValueError("datasets cannot be empty")
138
+ resp = self._get(
139
+ f"/v1/integrations/{platform}",
140
+ params={"datasets": ",".join(datasets)},
141
+ )
142
+ return resp.get("files", {})
143
+
144
+ # ------------------------------------------------------------------
145
+ # Source-specific helpers
146
+ # ------------------------------------------------------------------
147
+
148
+ def statsnz(self, name, **kwargs) -> Dataset:
149
+ """Fetch a Stats NZ dataset."""
150
+ return self._get_source(name, "Stats NZ", **kwargs)
151
+
152
+ def oecd(self, name, **kwargs) -> Dataset:
153
+ """Fetch an OECD dataset."""
154
+ return self._get_source(name, "OECD", **kwargs)
155
+
156
+ def rbnz(self, name, **kwargs) -> Dataset:
157
+ """Fetch an RBNZ dataset."""
158
+ return self._get_source(name, "RBNZ", **kwargs)
159
+
160
+ def treasury(self, name, **kwargs) -> Dataset:
161
+ """Fetch an NZ Treasury dataset."""
162
+ return self._get_source(name, "NZ Treasury", **kwargs)
163
+
164
+ def linz(self, name, **kwargs) -> Dataset:
165
+ """Fetch a LINZ dataset."""
166
+ return self._get_source(name, "LINZ", **kwargs)
167
+
168
+ def statsnz_geo(self, name, **kwargs) -> Dataset:
169
+ """Fetch a Stats NZ Geospatial dataset."""
170
+ return self._get_source(name, "Stats NZ Geospatial", **kwargs)
171
+
172
+ def mbie(self, name, **kwargs) -> Dataset:
173
+ """Fetch an MBIE dataset."""
174
+ return self._get_source(name, "MBIE", **kwargs)
175
+
176
+ def nzta(self, name, **kwargs) -> Dataset:
177
+ """Fetch a Waka Kotahi (NZTA) dataset."""
178
+ return self._get_source(name, "Waka Kotahi", **kwargs)
179
+
180
+ def msd(self, name, **kwargs) -> Dataset:
181
+ """Fetch an MSD dataset."""
182
+ return self._get_source(name, "MSD", **kwargs)
183
+
184
+ def police(self, name, **kwargs) -> Dataset:
185
+ """Fetch an NZ Police / MoJ dataset."""
186
+ return self._get_source(name, "NZ Police / MoJ", **kwargs)
187
+
188
+ def acc(self, name, **kwargs) -> Dataset:
189
+ """Fetch an ACC dataset."""
190
+ return self._get_source(name, "ACC", **kwargs)
191
+
192
+ def edcounts(self, name, **kwargs) -> Dataset:
193
+ """Fetch an Education Counts dataset."""
194
+ return self._get_source(name, "Education Counts", **kwargs)
195
+
196
+ def worksafe(self, name, **kwargs) -> Dataset:
197
+ """Fetch a WorkSafe NZ dataset."""
198
+ return self._get_source(name, "WorkSafe NZ", **kwargs)
199
+
200
+ def _get_source(self, name, source: str, **kwargs) -> Dataset:
201
+ df = self.get(name, **kwargs)
202
+ df.eolas_source = source
203
+ return df
204
+
205
+ # ------------------------------------------------------------------
206
+ # Core data fetch
207
+ # ------------------------------------------------------------------
208
+
209
+ def get(
210
+ self,
211
+ name: Union[str, "DatasetName"],
212
+ start: Optional[str] = None,
213
+ end: Optional[str] = None,
214
+ format: str = "json",
215
+ engine: str = "pandas",
216
+ limit: Optional[int] = None,
217
+ as_geo: Optional[bool] = None,
218
+ ) -> Dataset:
219
+ """Fetch dataset rows as a pandas (or polars / geopandas) DataFrame.
220
+
221
+ Args:
222
+ name: Dataset identifier, e.g. ``"nz_cpi"``. Type-checked against
223
+ the ``DatasetName`` Literal at static-analysis time so
224
+ IDEs autocomplete the catalog.
225
+ start: ISO date lower bound, e.g. ``"2020-01-01"``.
226
+ end: ISO date upper bound, e.g. ``"2024-12-31"``.
227
+ format: ``"json"`` (default) or ``"csv"``.
228
+ engine: ``"pandas"`` (default) or ``"polars"``.
229
+ limit: Max rows to return. Default ``None`` requests the full dataset
230
+ (server enforces a 50,000-row cap on Free/Starter plans; Pro is
231
+ unlimited). Pass an explicit integer to request fewer rows.
232
+ as_geo: Convert geospatial datasets to a ``GeoDataFrame``.
233
+ ``None`` (default) auto-converts when the dataset has a
234
+ ``geometry_wkt`` column AND ``geopandas`` is importable.
235
+ ``True`` forces the conversion (raises if geopandas missing).
236
+ ``False`` keeps the raw WKT string column.
237
+ Install with ``pip install eolas-data[geo]``.
238
+
239
+ Returns:
240
+ A :class:`Dataset` (pandas DataFrame subclass), a polars DataFrame
241
+ when ``engine="polars"``, or a ``geopandas.GeoDataFrame`` when
242
+ geometry is present and conversion is enabled.
243
+ """
244
+ params: dict = {}
245
+ if start:
246
+ params["start"] = start
247
+ if end:
248
+ params["end"] = end
249
+ # Server-side: limit=0 means "as much as the plan allows" (full dataset for Pro,
250
+ # 50K cap for Free/Starter). limit=None on the client maps to limit=0.
251
+ params["limit"] = 0 if limit is None else int(limit)
252
+
253
+ cache_key = f"{name}:{start}:{end}:{format}:{params['limit']}:{as_geo}"
254
+ if self._cache is not None and cache_key in self._cache:
255
+ return self._cache[cache_key]
256
+
257
+ if format == "csv":
258
+ from io import StringIO
259
+ resp = self._raw_get(f"/v1/datasets/{name}/data", params={"format": "csv", **params})
260
+ df = pd.read_csv(StringIO(resp.text))
261
+ else:
262
+ data = self._get(f"/v1/datasets/{name}/data", params=params)
263
+ records = data.get("data", data) if isinstance(data, dict) else data
264
+ df = pd.DataFrame(records)
265
+ if "date" in df.columns:
266
+ df["date"] = pd.to_datetime(df["date"])
267
+
268
+ result = Dataset(df)
269
+ result.eolas_name = name
270
+ result.eolas_source = ""
271
+
272
+ if engine == "polars":
273
+ try:
274
+ import polars as pl
275
+ return pl.from_pandas(result)
276
+ except ImportError:
277
+ raise ImportError(
278
+ "polars is required for engine='polars'. "
279
+ "Install with: pip install eolas-data[polars]"
280
+ )
281
+
282
+ # Optional geopandas conversion. When as_geo=None we auto-convert if both
283
+ # (a) the dataset has a geometry_wkt column AND (b) geopandas is importable.
284
+ if as_geo is not False and "geometry_wkt" in result.columns:
285
+ converted = _to_geodataframe(result, force=as_geo is True)
286
+ if converted is not None:
287
+ result = converted
288
+
289
+ if self._cache is not None:
290
+ self._cache[cache_key] = result
291
+
292
+ return result
293
+
294
+ # ------------------------------------------------------------------
295
+ # HTTP helpers
296
+ # ------------------------------------------------------------------
297
+
298
+ def _get(self, path: str, params: Optional[dict] = None) -> dict:
299
+ return self._raw_get(path, params=params).json()
300
+
301
+ def _raw_get(self, path: str, params: Optional[dict] = None) -> requests.Response:
302
+ url = f"{self._base}{path}"
303
+ resp = self._session.get(url, params=params)
304
+ self._raise_for_status(resp)
305
+ return resp
306
+
307
+ @staticmethod
308
+ def _raise_for_status(resp: requests.Response) -> None:
309
+ if resp.status_code == 200:
310
+ return
311
+ if resp.status_code == 401:
312
+ raise AuthenticationError("Invalid or missing API key.")
313
+ if resp.status_code == 403:
314
+ try:
315
+ detail = resp.json().get("detail", "API key is inactive.")
316
+ except Exception:
317
+ detail = "API key is inactive."
318
+ raise AuthenticationError(detail)
319
+ if resp.status_code == 429:
320
+ raise RateLimitError(
321
+ "Monthly request limit reached. Upgrade for higher limits."
322
+ )
323
+ if resp.status_code == 404:
324
+ try:
325
+ detail = resp.json().get("detail", "Not found.")
326
+ except Exception:
327
+ detail = "Not found."
328
+ raise NotFoundError(detail)
329
+ try:
330
+ detail = resp.json().get("detail", resp.text)
331
+ except Exception:
332
+ detail = resp.text
333
+ raise APIError(resp.status_code, detail)
eolas_data/dataset.py ADDED
@@ -0,0 +1,66 @@
1
+ from __future__ import annotations
2
+
3
+ import pandas as pd
4
+
5
+
6
+ class Dataset(pd.DataFrame):
7
+ """A pandas DataFrame with eolas dataset metadata.
8
+
9
+ Behaves exactly like a DataFrame — all pandas operations work normally.
10
+ Extra attributes:
11
+ eolas_name: Dataset identifier (e.g. ``"nz_cpi"``).
12
+ eolas_source: Data source label (e.g. ``"Stats NZ"``).
13
+ """
14
+
15
+ _metadata = ["eolas_name", "eolas_source"]
16
+
17
+ @property
18
+ def _constructor(self):
19
+ return Dataset
20
+
21
+ def __repr__(self) -> str:
22
+ name = getattr(self, "eolas_name", "") or ""
23
+ source = getattr(self, "eolas_source", "") or ""
24
+ if name:
25
+ header = f"# Dataset: {name}"
26
+ if source:
27
+ header += f" [{source}]"
28
+ header += f"\n# {len(self)} rows\n"
29
+ return header + pd.DataFrame.__repr__(self)
30
+ return pd.DataFrame.__repr__(self)
31
+
32
+ def plot_dataset(self, ax=None, **kwargs):
33
+ """Quick line chart using matplotlib.
34
+
35
+ Returns the matplotlib Axes object so you can customise further.
36
+ Requires matplotlib: ``pip install eolas-data[plot]``.
37
+ """
38
+ try:
39
+ import matplotlib.pyplot as plt
40
+ except ImportError:
41
+ raise ImportError(
42
+ "matplotlib is required for plot_dataset(). "
43
+ "Install with: pip install eolas-data[plot]"
44
+ )
45
+
46
+ date_col = "date" if "date" in self.columns else self.columns[0]
47
+ value_col = "value" if "value" in self.columns else self.columns[1]
48
+
49
+ if ax is None:
50
+ _, ax = plt.subplots(figsize=(10, 4))
51
+
52
+ ax.plot(self[date_col], self[value_col], color="#2563eb", linewidth=1.5, **kwargs)
53
+
54
+ name = getattr(self, "eolas_name", "") or ""
55
+ source = getattr(self, "eolas_source", "") or ""
56
+
57
+ if name:
58
+ ax.set_title(name, fontweight="bold", fontsize=13)
59
+ ax.set_xlabel("")
60
+ ax.spines[["top", "right"]].set_visible(False)
61
+
62
+ caption = f"Source: {source} · eolas.fyi" if source else "eolas.fyi"
63
+ ax.figure.text(0.99, 0.01, caption, ha="right", fontsize=8, color="#9ca3af")
64
+
65
+ plt.tight_layout()
66
+ return ax
@@ -0,0 +1,20 @@
1
+ class EolasError(Exception):
2
+ """Base exception for the eolas-data client."""
3
+
4
+
5
+ class AuthenticationError(EolasError):
6
+ pass
7
+
8
+
9
+ class RateLimitError(EolasError):
10
+ pass
11
+
12
+
13
+ class NotFoundError(EolasError):
14
+ pass
15
+
16
+
17
+ class APIError(EolasError):
18
+ def __init__(self, status_code: int, message: str):
19
+ self.status_code = status_code
20
+ super().__init__(f"HTTP {status_code}: {message}")
eolas_data/schedule.py ADDED
@@ -0,0 +1,258 @@
1
+ """Cross-platform scheduling backend for `eolas schedule add|list|remove`.
2
+
3
+ POSIX (Linux/macOS): edits the user's crontab via `crontab -l` / `crontab -`.
4
+ Windows: uses `schtasks` to create per-user scheduled tasks.
5
+
6
+ Both backends only manage entries tagged with a sentinel so the user's other
7
+ cron jobs / scheduled tasks are never touched.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import csv
12
+ import io
13
+ import platform
14
+ import re
15
+ import shlex
16
+ import shutil
17
+ import subprocess
18
+ from dataclasses import dataclass
19
+ from typing import Optional
20
+
21
+ SENTINEL = "# eolas-schedule:"
22
+ TASK_PREFIX = "eolas-" # Windows task name prefix
23
+
24
+ # Interval shortcut → cron expression (minute hour dom month dow). Daily/weekly/
25
+ # monthly all default to 6am because datasets typically refresh in the early
26
+ # hours; running at 6am gets the freshest data without competing for resources.
27
+ INTERVALS = {
28
+ "hourly": "0 * * * *",
29
+ "daily": "0 6 * * *",
30
+ "weekly": "0 6 * * 1", # Monday 6am
31
+ "monthly": "0 6 1 * *", # 1st of month, 6am
32
+ }
33
+
34
+ # Windows schtasks /sc value per interval. Custom cron exprs not supported on
35
+ # Windows backend — see _windows_add for the fallback message.
36
+ WIN_SCHED = {
37
+ "hourly": ("HOURLY", None),
38
+ "daily": ("DAILY", "06:00"),
39
+ "weekly": ("WEEKLY", "06:00"), # default day = today's weekday; we override below
40
+ "monthly": ("MONTHLY", "06:00"),
41
+ }
42
+
43
+ CRON_EXPR_RE = re.compile(r"^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s*$")
44
+
45
+
46
+ @dataclass
47
+ class ScheduleEntry:
48
+ name: str
49
+ schedule: str # cron expr (POSIX) or human description (Windows)
50
+ command: str
51
+
52
+
53
+ # ────────────────────────────────────────────────────────────────────────────
54
+ # Public API — dispatches per OS
55
+ # ────────────────────────────────────────────────────────────────────────────
56
+
57
+ def is_windows() -> bool:
58
+ return platform.system() == "Windows"
59
+
60
+
61
+ def add(name: str, schedule_expr: str, command: str) -> None:
62
+ """Register a scheduled task. `schedule_expr` is a cron expression on POSIX
63
+ or one of {'hourly','daily','weekly','monthly'} on Windows."""
64
+ if is_windows():
65
+ _windows_add(name, schedule_expr, command)
66
+ else:
67
+ _cron_add(name, schedule_expr, command)
68
+
69
+
70
+ def remove(name: str) -> bool:
71
+ """Remove a managed task. Returns True if removed, False if not found."""
72
+ if is_windows():
73
+ return _windows_remove(name)
74
+ return _cron_remove(name)
75
+
76
+
77
+ def list_entries() -> list[ScheduleEntry]:
78
+ """Return all managed eolas-schedule entries."""
79
+ if is_windows():
80
+ return _windows_list()
81
+ return _cron_list()
82
+
83
+
84
+ def interval_to_cron(interval: str) -> str:
85
+ """Return the cron expression for an interval shortcut. Raises on unknown."""
86
+ if interval not in INTERVALS:
87
+ raise ValueError(f"unknown interval {interval!r}; expected one of {list(INTERVALS)}")
88
+ return INTERVALS[interval]
89
+
90
+
91
+ def validate_cron_expr(expr: str) -> None:
92
+ """Basic shape check on a 5-field cron expression. Raises on invalid."""
93
+ if not CRON_EXPR_RE.match(expr):
94
+ raise ValueError(
95
+ f"invalid cron expression {expr!r}; expected 5 fields "
96
+ "(minute hour day-of-month month day-of-week)"
97
+ )
98
+
99
+
100
+ # ────────────────────────────────────────────────────────────────────────────
101
+ # POSIX cron backend
102
+ # ────────────────────────────────────────────────────────────────────────────
103
+
104
+ def _crontab_available() -> bool:
105
+ return shutil.which("crontab") is not None
106
+
107
+
108
+ def _cron_read() -> list[str]:
109
+ """Read the user's crontab. Returns [] when no crontab is set."""
110
+ if not _crontab_available():
111
+ raise RuntimeError(
112
+ "crontab is not installed on this system. "
113
+ "On Debian/Ubuntu: sudo apt-get install cron. On Alpine: apk add busybox-suid."
114
+ )
115
+ proc = subprocess.run(
116
+ ["crontab", "-l"], capture_output=True, text=True
117
+ )
118
+ if proc.returncode == 0:
119
+ return proc.stdout.splitlines()
120
+ # Some implementations exit 1 with "no crontab" — treat as empty.
121
+ if "no crontab" in (proc.stderr or "").lower():
122
+ return []
123
+ raise RuntimeError(f"crontab -l failed: {proc.stderr.strip() or proc.stdout.strip()}")
124
+
125
+
126
+ def _cron_write(lines: list[str]) -> None:
127
+ payload = "\n".join(lines).rstrip() + "\n"
128
+ proc = subprocess.run(
129
+ ["crontab", "-"], input=payload, text=True, capture_output=True
130
+ )
131
+ if proc.returncode != 0:
132
+ raise RuntimeError(f"crontab - failed: {proc.stderr.strip()}")
133
+
134
+
135
+ def _cron_format_line(name: str, cron_expr: str, command: str) -> str:
136
+ return f"{cron_expr} {command} {SENTINEL} {name}"
137
+
138
+
139
+ def _cron_match_name(line: str, name: str) -> bool:
140
+ return SENTINEL in line and line.rstrip().endswith(name)
141
+
142
+
143
+ def _cron_add(name: str, cron_expr: str, command: str) -> None:
144
+ validate_cron_expr(cron_expr)
145
+ lines = [l for l in _cron_read() if not _cron_match_name(l, name)] # idempotent
146
+ lines.append(_cron_format_line(name, cron_expr, command))
147
+ _cron_write(lines)
148
+
149
+
150
+ def _cron_remove(name: str) -> bool:
151
+ lines = _cron_read()
152
+ kept = [l for l in lines if not _cron_match_name(l, name)]
153
+ if len(kept) == len(lines):
154
+ return False
155
+ _cron_write(kept)
156
+ return True
157
+
158
+
159
+ def _cron_list() -> list[ScheduleEntry]:
160
+ out: list[ScheduleEntry] = []
161
+ for line in _cron_read():
162
+ if SENTINEL not in line:
163
+ continue
164
+ head, _, tail = line.partition(SENTINEL)
165
+ name = tail.strip()
166
+ parts = head.strip().split(maxsplit=5)
167
+ if len(parts) < 6:
168
+ continue # malformed; skip silently
169
+ cron_expr = " ".join(parts[:5])
170
+ command = parts[5]
171
+ out.append(ScheduleEntry(name=name, schedule=cron_expr, command=command))
172
+ return out
173
+
174
+
175
+ # ────────────────────────────────────────────────────────────────────────────
176
+ # Windows schtasks backend
177
+ # ────────────────────────────────────────────────────────────────────────────
178
+
179
+ def _schtasks_available() -> bool:
180
+ return shutil.which("schtasks") is not None
181
+
182
+
183
+ def _windows_add(name: str, interval: str, command: str) -> None:
184
+ if not _schtasks_available():
185
+ raise RuntimeError("schtasks not found — required on Windows for scheduling")
186
+ if interval not in WIN_SCHED:
187
+ raise ValueError(
188
+ f"Windows backend supports interval shortcuts only "
189
+ f"({list(WIN_SCHED)}); got {interval!r}. "
190
+ "Custom cron expressions aren't translatable; use schtasks GUI for advanced cases."
191
+ )
192
+ sc, st = WIN_SCHED[interval]
193
+ args = [
194
+ "schtasks", "/create",
195
+ "/tn", f"{TASK_PREFIX}{name}",
196
+ "/tr", command,
197
+ "/sc", sc,
198
+ "/f", # overwrite if exists (idempotent add)
199
+ ]
200
+ if st:
201
+ args += ["/st", st]
202
+ if interval == "weekly":
203
+ args += ["/d", "MON"]
204
+ proc = subprocess.run(args, capture_output=True, text=True)
205
+ if proc.returncode != 0:
206
+ raise RuntimeError(f"schtasks /create failed: {proc.stderr.strip()}")
207
+
208
+
209
+ def _windows_remove(name: str) -> bool:
210
+ proc = subprocess.run(
211
+ ["schtasks", "/delete", "/tn", f"{TASK_PREFIX}{name}", "/f"],
212
+ capture_output=True, text=True,
213
+ )
214
+ if proc.returncode == 0:
215
+ return True
216
+ # schtasks returns non-zero if the task doesn't exist
217
+ if "cannot find" in (proc.stderr + proc.stdout).lower():
218
+ return False
219
+ raise RuntimeError(f"schtasks /delete failed: {proc.stderr.strip()}")
220
+
221
+
222
+ def _windows_list() -> list[ScheduleEntry]:
223
+ proc = subprocess.run(
224
+ ["schtasks", "/query", "/fo", "CSV", "/v"],
225
+ capture_output=True, text=True,
226
+ )
227
+ if proc.returncode != 0:
228
+ raise RuntimeError(f"schtasks /query failed: {proc.stderr.strip()}")
229
+ out: list[ScheduleEntry] = []
230
+ reader = csv.DictReader(io.StringIO(proc.stdout))
231
+ for row in reader:
232
+ task_name = (row.get("TaskName") or "").lstrip("\\").strip()
233
+ if not task_name.startswith(TASK_PREFIX):
234
+ continue
235
+ out.append(ScheduleEntry(
236
+ name=task_name[len(TASK_PREFIX):],
237
+ schedule=row.get("Schedule Type") or "",
238
+ command=row.get("Task To Run") or "",
239
+ ))
240
+ return out
241
+
242
+
243
+ # ────────────────────────────────────────────────────────────────────────────
244
+ # Helpers used by cli.py
245
+ # ────────────────────────────────────────────────────────────────────────────
246
+
247
+ def build_command(eolas_path: str, dataset: str, out_path: str,
248
+ start: Optional[str] = None, end: Optional[str] = None,
249
+ fmt: str = "csv") -> str:
250
+ """Construct the shell command line to put inside the cron entry."""
251
+ parts = [shlex.quote(eolas_path), "get", shlex.quote(dataset),
252
+ "--format", shlex.quote(fmt),
253
+ "--out", shlex.quote(str(out_path))]
254
+ if start:
255
+ parts += ["--start", shlex.quote(start)]
256
+ if end:
257
+ parts += ["--end", shlex.quote(end)]
258
+ return " ".join(parts)