fluvilog 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
fluvilog/__init__.py ADDED
@@ -0,0 +1,5 @@
1
+ """fluvilog — near-real-time readings from Hamburg's water quality network (WGMN)."""
2
+
3
+ from .wgmn import fetch, fetch_history
4
+
5
+ __all__ = ["fetch", "fetch_history"]
fluvilog/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Entry point for `python -m fluvilog`."""
2
+
3
+ from .cli import main
4
+
5
+ if __name__ == "__main__":
6
+ raise SystemExit(main())
@@ -0,0 +1,9 @@
1
+ """Optional HTTP read API (requires the [api] extra: FastAPI + uvicorn).
2
+
3
+ Importing this package pulls in FastAPI; the base CLI keeps it out of the import
4
+ graph and only loads it inside the serve-api handler.
5
+ """
6
+
7
+ from .app import create_app
8
+
9
+ __all__ = ["create_app"]
fluvilog/api/app.py ADDED
@@ -0,0 +1,107 @@
1
+ """FastAPI application factory for the optional HTTP read API.
2
+
3
+ Three GET endpoints over stored readings plus the station catalogue. Build with
4
+ create_app; the `fluvilog serve-api` subcommand runs it under uvicorn. Each
5
+ request gets its own read-only Storage, so reads never touch the poller's writer
6
+ connection or its schema.
7
+ """
8
+
9
+ # Route handlers are registered by the @app.get decorators' side effect, which
10
+ # the type checker can't see; without this it flags them as unused.
11
+ # pyright: reportUnusedFunction=false
12
+
13
+ from collections.abc import Iterator
14
+ from dataclasses import asdict
15
+ from datetime import datetime, timedelta
16
+ from typing import Annotated
17
+ from zoneinfo import ZoneInfo
18
+
19
+ from fastapi import Depends, FastAPI, HTTPException, Query
20
+ from fastapi.middleware.cors import CORSMiddleware
21
+
22
+ from .. import catalogue
23
+ from ..constants import BERLIN_TZ, MAX_WINDOW_DAYS, PARAMETERS, STATIONS
24
+ from ..storage import SqliteStorage
25
+ from .schemas import ReadingOut, StationOut
26
+
27
+ _BERLIN = ZoneInfo(BERLIN_TZ)
28
+
29
+
30
+ def _as_berlin(value: datetime) -> datetime:
31
+ """Make a query datetime tz-aware in Europe/Berlin (naive ⇒ interpreted as)."""
32
+ if value.tzinfo is None:
33
+ return value.replace(tzinfo=_BERLIN)
34
+ return value.astimezone(_BERLIN)
35
+
36
+
37
+ def _validate_filters(station: list[str] | None, parameter: list[str] | None) -> None:
38
+ """Reject unknown station codes or parameter names with HTTP 422."""
39
+ unknown_st = [s for s in station or [] if s not in STATIONS]
40
+ if unknown_st:
41
+ raise HTTPException(422, f"unknown station code(s): {unknown_st}")
42
+ unknown_pa = [p for p in parameter or [] if p not in PARAMETERS]
43
+ if unknown_pa:
44
+ raise HTTPException(422, f"unknown parameter name(s): {unknown_pa}")
45
+
46
+
47
+ def _validate_window(start: datetime, end: datetime) -> None:
48
+ """Reject reversed or over-wide windows with HTTP 422."""
49
+ if start > end:
50
+ raise HTTPException(422, "'from' must not be after 'to'")
51
+ if end - start > timedelta(days=MAX_WINDOW_DAYS):
52
+ raise HTTPException(422, f"window exceeds the {MAX_WINDOW_DAYS}-day limit")
53
+
54
+
55
+ def create_app(*, db_path: str, allowed_origins: list[str]) -> FastAPI:
56
+ """Build the read-only FastAPI app bound to a SQLite database path.
57
+
58
+ allowed_origins seeds CORS (GET only); an empty list permits no cross-origin
59
+ request. db_path is opened read-only per request; the schema is never touched.
60
+ """
61
+ app = FastAPI(title="fluvilog API", version="0.1.0")
62
+ app.add_middleware(
63
+ CORSMiddleware,
64
+ allow_origins=allowed_origins,
65
+ allow_methods=["GET"],
66
+ allow_headers=["*"],
67
+ )
68
+
69
+ def get_storage() -> Iterator[SqliteStorage]:
70
+ store = SqliteStorage.open_readonly(db_path)
71
+ try:
72
+ yield store
73
+ finally:
74
+ store.close()
75
+
76
+ @app.get("/api/stations")
77
+ def get_stations() -> list[StationOut]:
78
+ return [StationOut(**asdict(s)) for s in catalogue.stations()]
79
+
80
+ @app.get("/api/readings/latest")
81
+ def get_latest(
82
+ store: Annotated[SqliteStorage, Depends(get_storage)],
83
+ station: Annotated[list[str] | None, Query()] = None,
84
+ parameter: Annotated[list[str] | None, Query()] = None,
85
+ ) -> list[ReadingOut]:
86
+ _validate_filters(station, parameter)
87
+ rows = store.latest_readings(station_codes=station, parameters=parameter)
88
+ return [ReadingOut(**asdict(r)) for r in rows]
89
+
90
+ @app.get("/api/readings")
91
+ def get_readings(
92
+ store: Annotated[SqliteStorage, Depends(get_storage)],
93
+ start: Annotated[datetime, Query(alias="from")],
94
+ end: Annotated[datetime | None, Query(alias="to")] = None,
95
+ station: Annotated[list[str] | None, Query()] = None,
96
+ parameter: Annotated[list[str] | None, Query()] = None,
97
+ ) -> list[ReadingOut]:
98
+ _validate_filters(station, parameter)
99
+ start = _as_berlin(start)
100
+ end = datetime.now(_BERLIN) if end is None else _as_berlin(end)
101
+ _validate_window(start, end)
102
+ rows = store.readings_in_window(
103
+ start, end, station_codes=station, parameters=parameter
104
+ )
105
+ return [ReadingOut(**asdict(r)) for r in rows]
106
+
107
+ return app
@@ -0,0 +1,29 @@
1
+ """Public, validated HTTP response models.
2
+
3
+ Pydantic shapes for the wire contract, kept separate from records.py so the HTTP
4
+ surface can evolve independently of the internal storage records.
5
+ """
6
+
7
+ from datetime import datetime
8
+
9
+ from pydantic import BaseModel
10
+
11
+
12
+ class StationOut(BaseModel):
13
+ """A station in the catalogue. latitude/longitude are WGS84 degrees."""
14
+
15
+ code: str
16
+ name: str
17
+ water_body: str
18
+ latitude: float
19
+ longitude: float
20
+
21
+
22
+ class ReadingOut(BaseModel):
23
+ """A single reading. timestamp serialises as ISO 8601 with offset."""
24
+
25
+ station_code: str
26
+ parameter: str
27
+ unit: str
28
+ timestamp: datetime
29
+ value: float | None
fluvilog/catalogue.py ADDED
@@ -0,0 +1,34 @@
1
+ """Station reference data: the catalogue joining names with coordinates.
2
+
3
+ Source of truth is constants (STATIONS + STATION_COORDS), not the DB stations
4
+ table — that table exists only for the readings foreign key.
5
+ """
6
+
7
+ from .constants import STATION_COORDS, STATIONS
8
+ from .records import StationRecord
9
+
10
+
11
+ def _check_coords() -> None:
12
+ """Raise ValueError if any STATIONS code lacks a STATION_COORDS entry."""
13
+ missing = set(STATIONS) - set(STATION_COORDS)
14
+ if missing:
15
+ raise ValueError(
16
+ f"STATION_COORDS is missing coordinates for: {sorted(missing)}"
17
+ )
18
+
19
+
20
+ _check_coords()
21
+
22
+
23
+ def stations() -> list[StationRecord]:
24
+ """Return all WGMN stations sorted by code, each with WGS84 coordinates."""
25
+ return [
26
+ StationRecord(
27
+ code=code,
28
+ name=name,
29
+ water_body=water_body,
30
+ latitude=STATION_COORDS[code][0],
31
+ longitude=STATION_COORDS[code][1],
32
+ )
33
+ for code, (name, water_body) in sorted(STATIONS.items())
34
+ ]
fluvilog/cli.py ADDED
@@ -0,0 +1,214 @@
1
+ """Near-real-time readings from Hamburg's water quality network (WGMN).
2
+
3
+ Polls the public HamburgService platform (Wassergüte-Auskunft) at
4
+ serviceportal.hamburg.de and, by default, stores readings continuously.
5
+
6
+ Usage:
7
+ fluvilog # collect: poll and store (default subcommand)
8
+ fluvilog collect --station BL SH # collect only specific stations
9
+ fluvilog collect --db water.db --interval 10m
10
+ fluvilog once # one-shot fetch and print
11
+ fluvilog once --csv values.csv # ... and write to CSV
12
+ fluvilog list # list known stations
13
+ fluvilog serve-api # serve the HTTP read API (needs [api] extra)
14
+ """
15
+
16
+ import argparse
17
+ import logging
18
+ import sqlite3
19
+ import sys
20
+
21
+ import pandas as pd
22
+ import requests
23
+
24
+ from .constants import (
25
+ DEFAULT_API_HOST,
26
+ DEFAULT_API_PORT,
27
+ DEFAULT_DB_PATH,
28
+ DEFAULT_INTERVAL,
29
+ DEFAULT_PARAMETERS,
30
+ STATIONS,
31
+ )
32
+ from .service import collect, parse_interval
33
+ from .storage import IncompatibleSchemaError, SqliteStorage
34
+ from .wgmn import fetch
35
+
36
+ _COMMANDS = {"collect", "once", "list", "serve-api"}
37
+
38
+
39
+ def resolve_codes(selectors: list[str] | None) -> list[str]:
40
+ """Translate --station arguments (code or name, case-insensitive) to codes."""
41
+ if not selectors:
42
+ return list(STATIONS)
43
+ by_name = {name.casefold(): code for code, (name, _) in STATIONS.items()}
44
+ codes: list[str] = []
45
+ for sel in selectors:
46
+ if sel.upper() in STATIONS:
47
+ codes.append(sel.upper())
48
+ elif sel.casefold() in by_name:
49
+ codes.append(by_name[sel.casefold()])
50
+ else:
51
+ print(f" ! unknown station: {sel!r} (list shows all)", file=sys.stderr)
52
+ return codes
53
+
54
+
55
+ def _run_list() -> int:
56
+ """Print the station catalogue and exit."""
57
+ print("# Known WGMN stations:")
58
+ for code, (name, water_body) in STATIONS.items():
59
+ print(f" {code} {name} ({water_body})")
60
+ return 0
61
+
62
+
63
+ def _run_once(args: argparse.Namespace) -> int:
64
+ """Fetch the latest values once, print them, and optionally write CSV."""
65
+ codes = resolve_codes(args.station)
66
+ if not codes:
67
+ return 2
68
+
69
+ try:
70
+ df = fetch(codes, DEFAULT_PARAMETERS)
71
+ except requests.RequestException as e:
72
+ print(f"Network/HTTP error: {e}", file=sys.stderr)
73
+ return 1
74
+
75
+ if df.empty:
76
+ print("No measurements available.", file=sys.stderr)
77
+ return 1
78
+
79
+ pd.set_option("display.max_rows", None)
80
+ pd.set_option("display.width", 200)
81
+ print(df.to_string(index=False))
82
+ latest = df["timestamp"].max()
83
+ print(
84
+ f"\n{len(df)} measurement(s) from {df['station'].nunique()} station(s), "
85
+ f"latest: {latest:%d.%m.%Y %H:%M}."
86
+ )
87
+
88
+ if args.csv:
89
+ df.to_csv(args.csv, index=False)
90
+ print(f"Saved: {args.csv}")
91
+ return 0
92
+
93
+
94
+ def _run_collect(args: argparse.Namespace) -> int:
95
+ """Run the continuous poll-and-store loop."""
96
+ codes = resolve_codes(args.station)
97
+ if not codes:
98
+ return 2
99
+
100
+ try:
101
+ with SqliteStorage(args.db) as store:
102
+ return collect(codes, DEFAULT_PARAMETERS, store, args.interval)
103
+ except IncompatibleSchemaError as e:
104
+ print(str(e), file=sys.stderr)
105
+ return 1
106
+ except sqlite3.Error as e:
107
+ print(f"Database error: {e}", file=sys.stderr)
108
+ return 1
109
+
110
+
111
+ def _run_serve_api(args: argparse.Namespace) -> int:
112
+ """Serve the HTTP read API under uvicorn (requires the optional [api] extra).
113
+
114
+ Imports the web stack lazily so the base CLI works without [api] installed.
115
+ """
116
+ try:
117
+ import uvicorn
118
+
119
+ from .api import create_app
120
+ except ImportError:
121
+ print(
122
+ "The HTTP API needs the optional dependencies. "
123
+ "Install them with: pip install 'fluvilog[api]'",
124
+ file=sys.stderr,
125
+ )
126
+ return 1
127
+
128
+ app = create_app(db_path=args.db, allowed_origins=args.cors_origin)
129
+ uvicorn.run(app, host=args.host, port=args.port)
130
+ return 0
131
+
132
+
133
+ def main(argv: list[str] | None = None) -> int:
134
+ """Entry point. Bare invocation (no subcommand) runs `collect`."""
135
+ logging.basicConfig(
136
+ level=logging.INFO,
137
+ format="%(asctime)s %(levelname)s %(message)s",
138
+ datefmt="%Y-%m-%d %H:%M:%S",
139
+ stream=sys.stderr,
140
+ )
141
+ argv = sys.argv[1:] if argv is None else list(argv)
142
+ if not argv or (argv[0] not in _COMMANDS and argv[0] not in {"-h", "--help"}):
143
+ argv = ["collect", *argv]
144
+
145
+ ap = argparse.ArgumentParser(
146
+ description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
147
+ )
148
+ sub = ap.add_subparsers(dest="command", required=True)
149
+
150
+ p_collect = sub.add_parser("collect", help="Continuously fetch and store (default)")
151
+ p_collect.add_argument(
152
+ "--station", nargs="+", metavar="CODE/NAME", help="Only these stations"
153
+ )
154
+ p_collect.add_argument(
155
+ "--db",
156
+ metavar="PATH",
157
+ default=DEFAULT_DB_PATH,
158
+ help=f"SQLite database path (default: {DEFAULT_DB_PATH})",
159
+ )
160
+ p_collect.add_argument(
161
+ "--interval",
162
+ type=parse_interval,
163
+ default=float(DEFAULT_INTERVAL),
164
+ metavar="DURATION",
165
+ help=f"Poll interval, e.g. 30s/10m/1h (default: {DEFAULT_INTERVAL}s)",
166
+ )
167
+
168
+ p_once = sub.add_parser("once", help="One-shot fetch and print")
169
+ p_once.add_argument(
170
+ "--station", nargs="+", metavar="CODE/NAME", help="Only these stations"
171
+ )
172
+ p_once.add_argument("--csv", metavar="PATH", help="Write result to CSV")
173
+
174
+ sub.add_parser("list", help="List known stations and exit")
175
+
176
+ p_api = sub.add_parser(
177
+ "serve-api", help="Serve the HTTP read API (needs the [api] extra)"
178
+ )
179
+ p_api.add_argument(
180
+ "--db",
181
+ metavar="PATH",
182
+ default=DEFAULT_DB_PATH,
183
+ help=f"SQLite database path (default: {DEFAULT_DB_PATH})",
184
+ )
185
+ p_api.add_argument(
186
+ "--host",
187
+ metavar="HOST",
188
+ default=DEFAULT_API_HOST,
189
+ help=f"Bind host (default: {DEFAULT_API_HOST})",
190
+ )
191
+ p_api.add_argument(
192
+ "--port",
193
+ type=int,
194
+ metavar="PORT",
195
+ default=DEFAULT_API_PORT,
196
+ help=f"Bind port (default: {DEFAULT_API_PORT})",
197
+ )
198
+ p_api.add_argument(
199
+ "--cors-origin",
200
+ action="append",
201
+ default=[],
202
+ metavar="ORIGIN",
203
+ help="Allowed CORS origin; repeatable (default: none)",
204
+ )
205
+
206
+ args = ap.parse_args(argv)
207
+
208
+ if args.command == "list":
209
+ return _run_list()
210
+ if args.command == "once":
211
+ return _run_once(args)
212
+ if args.command == "serve-api":
213
+ return _run_serve_api(args)
214
+ return _run_collect(args)
fluvilog/constants.py ADDED
@@ -0,0 +1,87 @@
1
+ """Static configuration: endpoint, request limits, and the station/parameter
2
+ catalogues of the HamburgService water quality endpoint."""
3
+
4
+ # Service endpoint (ASP.NET WebForms "Wassergüte-Auskunft").
5
+ BASE = "https://serviceportal.hamburg.de/HamburgGateway"
6
+ START = f"{BASE}/Service/StartService/WGMN?linkId=0&ars=020000000000"
7
+ PFX = "GatewayMaster:ContentSection:wucStationenAuswahlListe1:"
8
+ ENCODING = "windows-1252"
9
+ TIMEOUT = 60
10
+ USER_AGENT = (
11
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
12
+ "AppleWebKit/605.1.15 (KHTML, like Gecko) Safari/605.1.15"
13
+ )
14
+
15
+ # Service limit per request.
16
+ MAX_STATIONS = 5
17
+ MAX_PARAMETERS = 5
18
+
19
+ # Station code -> (station name, body of water). The 9 official WGMN stations.
20
+ # The form's checkbox order (cblStationen:0..8) is not encoded here; it is read
21
+ # from the live form at runtime (see _station_index_map in wgmn.py).
22
+ STATIONS: dict[str, tuple[str, str]] = {
23
+ "BU": ("Bunthaus", "Elbe"),
24
+ "SH": ("Seemannshöft", "Elbe"),
25
+ "BL": ("Blankenese", "Elbe"),
26
+ "LB": ("Lombardsbrücke", "Alster"),
27
+ "HA": ("Haselknick", "Alster"),
28
+ "TA": ("Rosenbrook", "Tarpenbek"),
29
+ "BK": ("Brügkamp", "Ammersbek"),
30
+ "FH": ("Fischerhof", "Bille"),
31
+ "WA": ("Wandsbeker Allee", "Wandse"),
32
+ }
33
+
34
+ # WGS84 (latitude, longitude) per station code. Static reference data: the 9
35
+ # WGMN stations are fixed, so coordinates live here rather than in the DB schema.
36
+ # Order is (lat, lon) for human sanity; GeoJSON's [lon, lat] is the API's job.
37
+ # Keys must stay in sync with STATIONS (enforced by catalogue.stations()).
38
+ STATION_COORDS: dict[str, tuple[float, float]] = {
39
+ "BU": (53.46166, 10.06434), # Bunthaus (Elbe)
40
+ "SH": (53.54024, 9.87984), # Seemannshöft (Elbe)
41
+ "BL": (53.55587, 9.80545), # Blankenese (Elbe)
42
+ "LB": (53.55725, 9.99797), # Lombardsbrücke (Alster)
43
+ "HA": (53.69548, 10.12075), # Haselknick (Alster)
44
+ "TA": (53.60223, 9.98624), # Rosenbrook (Tarpenbek)
45
+ "BK": (53.71191, 10.16285), # Brügkamp (Ammersbek)
46
+ "FH": (53.48927, 10.21090), # Fischerhof (Bille)
47
+ "WA": (53.57616, 10.06895), # Wandsbeker Allee (Wandse)
48
+ }
49
+
50
+ # Order of the parameter checkboxes (clbMesswerte:0..13) in the form.
51
+ PARAMETERS = [
52
+ "Lufttemperatur",
53
+ "Wassertemperatur",
54
+ "Sauerstoffkonzentration",
55
+ "Sauerstoffsättigung",
56
+ "pH-Wert",
57
+ "Leitfähigkeit Kappa 25",
58
+ "Trübung",
59
+ "Gesamtchlorophyll",
60
+ "Chlorophyll Blaualgen",
61
+ "Chlorophyll Grünalgen",
62
+ "Chlorophyll Kieselalgen",
63
+ "Chlorophyll Cryptophyceen",
64
+ "UV-Absorption",
65
+ "AlarmIndex",
66
+ ]
67
+ # Core parameters (water temp, O2 conc., O2 sat., pH, conductivity, turbidity).
68
+ DEFAULT_PARAMETERS = [1, 2, 3, 4, 5, 6]
69
+
70
+ # Collect-mode defaults.
71
+ DEFAULT_INTERVAL = 600 # seconds between polls (source updates ~every 10 min)
72
+ MIN_INTERVAL = 30 # floor enforced by --interval parsing
73
+ DEFAULT_DB_PATH = "fluvilog.db" # SQLite file, relative to cwd
74
+
75
+ # Timezone of stored reading timestamps; also the offset all read APIs emit.
76
+ BERLIN_TZ = "Europe/Berlin"
77
+
78
+ # HTTP API tier (optional [api] extra; see fluvilog.api).
79
+ MAX_WINDOW_DAYS = 30 # largest /api/readings window before a 422
80
+ DEFAULT_API_HOST = "127.0.0.1"
81
+ DEFAULT_API_PORT = 8000
82
+
83
+ # Schema object names (DDL itself lives in storage.py).
84
+ TABLE_READINGS = "readings" # fact table
85
+ TABLE_STATIONS = "stations" # station dimension
86
+ TABLE_PARAMETERS = "parameters" # parameter dimension
87
+ VIEW_READINGS_FULL = "readings_full" # denormalized join of the three
fluvilog/records.py ADDED
@@ -0,0 +1,39 @@
1
+ """Internal read records: the contract between storage/catalogue and callers.
2
+
3
+ Plain dataclasses, deliberately not Pydantic, so the persistence and read
4
+ layers carry no dependency on the optional API tier. See fluvilog.api.schemas
5
+ for the public HTTP shapes.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from datetime import datetime
10
+
11
+
12
+ @dataclass(frozen=True, slots=True)
13
+ class StationRecord:
14
+ """A WGMN station with its WGS84 position.
15
+
16
+ latitude/longitude are decimal degrees (EPSG:4326).
17
+ """
18
+
19
+ code: str
20
+ name: str
21
+ water_body: str
22
+ latitude: float
23
+ longitude: float
24
+
25
+
26
+ @dataclass(frozen=True, slots=True)
27
+ class ReadingRecord:
28
+ """A single stored reading.
29
+
30
+ parameter is the display name (e.g. "Wassertemperatur"). timestamp is
31
+ tz-aware Europe/Berlin. value is None when the sensor reported no value;
32
+ None is preserved, not dropped.
33
+ """
34
+
35
+ station_code: str
36
+ parameter: str
37
+ unit: str
38
+ timestamp: datetime
39
+ value: float | None
fluvilog/service.py ADDED
@@ -0,0 +1,83 @@
1
+ """Continuous polling loop that persists WGMN readings to a Storage backend."""
2
+
3
+ import datetime as dt
4
+ import logging
5
+ import signal
6
+ import sqlite3
7
+ import threading
8
+ import time
9
+
10
+ import requests
11
+
12
+ from .constants import MIN_INTERVAL
13
+ from .storage import Storage
14
+ from .wgmn import fetch_history
15
+
16
+ _UNITS = {"s": 1, "m": 60, "h": 3600}
17
+
18
+
19
+ def parse_interval(text: str) -> float:
20
+ """Parse '600', '30s', '10m', or '1h' to seconds.
21
+
22
+ A bare number is seconds; a trailing s/m/h scales accordingly. The result
23
+ must be at least MIN_INTERVAL. Raises ValueError on malformed or too-small
24
+ input.
25
+ """
26
+ text = text.strip().lower()
27
+ if text and text[-1] in _UNITS:
28
+ seconds = float(text[:-1]) * _UNITS[text[-1]]
29
+ else:
30
+ seconds = float(text)
31
+ if seconds < MIN_INTERVAL:
32
+ raise ValueError(f"interval must be >= {MIN_INTERVAL}s")
33
+ return seconds
34
+
35
+
36
+ def collect(
37
+ station_codes: list[str],
38
+ parameter_idx: list[int],
39
+ storage: Storage,
40
+ interval: float,
41
+ *,
42
+ log: logging.Logger | None = None,
43
+ ) -> int:
44
+ """Poll the selected stations/parameters every `interval` seconds.
45
+
46
+ Each poll fetches the full window and persists it idempotently. Network and
47
+ storage errors in one iteration are logged and skipped; the loop continues.
48
+ The interval is measured from each iteration's start, so a slow fetch
49
+ shortens the following wait rather than accumulating drift. Runs until
50
+ SIGINT or SIGTERM, then returns 0.
51
+ """
52
+ log = log or logging.getLogger(__name__)
53
+ stop = threading.Event()
54
+
55
+ def _handle(signum: int, frame: object) -> None:
56
+ log.info("received %s, shutting down", signal.Signals(signum).name)
57
+ stop.set()
58
+
59
+ signal.signal(signal.SIGINT, _handle)
60
+ signal.signal(signal.SIGTERM, _handle)
61
+
62
+ log.info(
63
+ "collecting %d station(s) × %d parameter(s), every %.0fs",
64
+ len(station_codes),
65
+ len(parameter_idx),
66
+ interval,
67
+ )
68
+ while not stop.is_set():
69
+ started = time.monotonic()
70
+ fetched_at = dt.datetime.now(dt.UTC)
71
+ try:
72
+ df = fetch_history(station_codes, parameter_idx)
73
+ inserted = storage.write(df, fetched_at)
74
+ log.info("inserted %d new row(s)", inserted)
75
+ except requests.RequestException as e:
76
+ log.warning("fetch failed, skipping iteration: %s", e)
77
+ except sqlite3.Error as e:
78
+ log.warning("storage error, skipping iteration: %s", e)
79
+ delay = max(0.0, interval - (time.monotonic() - started))
80
+ log.info("next poll in %.0fs", delay)
81
+ if stop.wait(timeout=delay):
82
+ break
83
+ return 0