fluvilog 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fluvilog/__init__.py +5 -0
- fluvilog/__main__.py +6 -0
- fluvilog/api/__init__.py +9 -0
- fluvilog/api/app.py +107 -0
- fluvilog/api/schemas.py +29 -0
- fluvilog/catalogue.py +34 -0
- fluvilog/cli.py +214 -0
- fluvilog/constants.py +87 -0
- fluvilog/records.py +39 -0
- fluvilog/service.py +83 -0
- fluvilog/storage.py +359 -0
- fluvilog/wgmn.py +237 -0
- fluvilog-0.1.0.dist-info/METADATA +14 -0
- fluvilog-0.1.0.dist-info/RECORD +18 -0
- fluvilog-0.1.0.dist-info/WHEEL +5 -0
- fluvilog-0.1.0.dist-info/entry_points.txt +2 -0
- fluvilog-0.1.0.dist-info/licenses/LICENSE +661 -0
- fluvilog-0.1.0.dist-info/top_level.txt +1 -0
fluvilog/__init__.py
ADDED
fluvilog/__main__.py
ADDED
fluvilog/api/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Optional HTTP read API (requires the [api] extra: FastAPI + uvicorn).
|
|
2
|
+
|
|
3
|
+
Importing this package pulls in FastAPI; the base CLI keeps it out of the import
|
|
4
|
+
graph and only loads it inside the serve-api handler.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .app import create_app
|
|
8
|
+
|
|
9
|
+
__all__ = ["create_app"]
|
fluvilog/api/app.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""FastAPI application factory for the optional HTTP read API.
|
|
2
|
+
|
|
3
|
+
Three GET endpoints over stored readings plus the station catalogue. Build with
|
|
4
|
+
create_app; the `fluvilog serve-api` subcommand runs it under uvicorn. Each
|
|
5
|
+
request gets its own read-only Storage, so reads never touch the poller's writer
|
|
6
|
+
connection or its schema.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
# Route handlers are registered by the @app.get decorators' side effect, which
|
|
10
|
+
# the type checker can't see; without this it flags them as unused.
|
|
11
|
+
# pyright: reportUnusedFunction=false
|
|
12
|
+
|
|
13
|
+
from collections.abc import Iterator
|
|
14
|
+
from dataclasses import asdict
|
|
15
|
+
from datetime import datetime, timedelta
|
|
16
|
+
from typing import Annotated
|
|
17
|
+
from zoneinfo import ZoneInfo
|
|
18
|
+
|
|
19
|
+
from fastapi import Depends, FastAPI, HTTPException, Query
|
|
20
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
21
|
+
|
|
22
|
+
from .. import catalogue
|
|
23
|
+
from ..constants import BERLIN_TZ, MAX_WINDOW_DAYS, PARAMETERS, STATIONS
|
|
24
|
+
from ..storage import SqliteStorage
|
|
25
|
+
from .schemas import ReadingOut, StationOut
|
|
26
|
+
|
|
27
|
+
_BERLIN = ZoneInfo(BERLIN_TZ)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _as_berlin(value: datetime) -> datetime:
|
|
31
|
+
"""Make a query datetime tz-aware in Europe/Berlin (naive ⇒ interpreted as)."""
|
|
32
|
+
if value.tzinfo is None:
|
|
33
|
+
return value.replace(tzinfo=_BERLIN)
|
|
34
|
+
return value.astimezone(_BERLIN)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _validate_filters(station: list[str] | None, parameter: list[str] | None) -> None:
|
|
38
|
+
"""Reject unknown station codes or parameter names with HTTP 422."""
|
|
39
|
+
unknown_st = [s for s in station or [] if s not in STATIONS]
|
|
40
|
+
if unknown_st:
|
|
41
|
+
raise HTTPException(422, f"unknown station code(s): {unknown_st}")
|
|
42
|
+
unknown_pa = [p for p in parameter or [] if p not in PARAMETERS]
|
|
43
|
+
if unknown_pa:
|
|
44
|
+
raise HTTPException(422, f"unknown parameter name(s): {unknown_pa}")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _validate_window(start: datetime, end: datetime) -> None:
|
|
48
|
+
"""Reject reversed or over-wide windows with HTTP 422."""
|
|
49
|
+
if start > end:
|
|
50
|
+
raise HTTPException(422, "'from' must not be after 'to'")
|
|
51
|
+
if end - start > timedelta(days=MAX_WINDOW_DAYS):
|
|
52
|
+
raise HTTPException(422, f"window exceeds the {MAX_WINDOW_DAYS}-day limit")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def create_app(*, db_path: str, allowed_origins: list[str]) -> FastAPI:
|
|
56
|
+
"""Build the read-only FastAPI app bound to a SQLite database path.
|
|
57
|
+
|
|
58
|
+
allowed_origins seeds CORS (GET only); an empty list permits no cross-origin
|
|
59
|
+
request. db_path is opened read-only per request; the schema is never touched.
|
|
60
|
+
"""
|
|
61
|
+
app = FastAPI(title="fluvilog API", version="0.1.0")
|
|
62
|
+
app.add_middleware(
|
|
63
|
+
CORSMiddleware,
|
|
64
|
+
allow_origins=allowed_origins,
|
|
65
|
+
allow_methods=["GET"],
|
|
66
|
+
allow_headers=["*"],
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def get_storage() -> Iterator[SqliteStorage]:
|
|
70
|
+
store = SqliteStorage.open_readonly(db_path)
|
|
71
|
+
try:
|
|
72
|
+
yield store
|
|
73
|
+
finally:
|
|
74
|
+
store.close()
|
|
75
|
+
|
|
76
|
+
@app.get("/api/stations")
|
|
77
|
+
def get_stations() -> list[StationOut]:
|
|
78
|
+
return [StationOut(**asdict(s)) for s in catalogue.stations()]
|
|
79
|
+
|
|
80
|
+
@app.get("/api/readings/latest")
|
|
81
|
+
def get_latest(
|
|
82
|
+
store: Annotated[SqliteStorage, Depends(get_storage)],
|
|
83
|
+
station: Annotated[list[str] | None, Query()] = None,
|
|
84
|
+
parameter: Annotated[list[str] | None, Query()] = None,
|
|
85
|
+
) -> list[ReadingOut]:
|
|
86
|
+
_validate_filters(station, parameter)
|
|
87
|
+
rows = store.latest_readings(station_codes=station, parameters=parameter)
|
|
88
|
+
return [ReadingOut(**asdict(r)) for r in rows]
|
|
89
|
+
|
|
90
|
+
@app.get("/api/readings")
|
|
91
|
+
def get_readings(
|
|
92
|
+
store: Annotated[SqliteStorage, Depends(get_storage)],
|
|
93
|
+
start: Annotated[datetime, Query(alias="from")],
|
|
94
|
+
end: Annotated[datetime | None, Query(alias="to")] = None,
|
|
95
|
+
station: Annotated[list[str] | None, Query()] = None,
|
|
96
|
+
parameter: Annotated[list[str] | None, Query()] = None,
|
|
97
|
+
) -> list[ReadingOut]:
|
|
98
|
+
_validate_filters(station, parameter)
|
|
99
|
+
start = _as_berlin(start)
|
|
100
|
+
end = datetime.now(_BERLIN) if end is None else _as_berlin(end)
|
|
101
|
+
_validate_window(start, end)
|
|
102
|
+
rows = store.readings_in_window(
|
|
103
|
+
start, end, station_codes=station, parameters=parameter
|
|
104
|
+
)
|
|
105
|
+
return [ReadingOut(**asdict(r)) for r in rows]
|
|
106
|
+
|
|
107
|
+
return app
|
fluvilog/api/schemas.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Public, validated HTTP response models.
|
|
2
|
+
|
|
3
|
+
Pydantic shapes for the wire contract, kept separate from records.py so the HTTP
|
|
4
|
+
surface can evolve independently of the internal storage records.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class StationOut(BaseModel):
|
|
13
|
+
"""A station in the catalogue. latitude/longitude are WGS84 degrees."""
|
|
14
|
+
|
|
15
|
+
code: str
|
|
16
|
+
name: str
|
|
17
|
+
water_body: str
|
|
18
|
+
latitude: float
|
|
19
|
+
longitude: float
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ReadingOut(BaseModel):
|
|
23
|
+
"""A single reading. timestamp serialises as ISO 8601 with offset."""
|
|
24
|
+
|
|
25
|
+
station_code: str
|
|
26
|
+
parameter: str
|
|
27
|
+
unit: str
|
|
28
|
+
timestamp: datetime
|
|
29
|
+
value: float | None
|
fluvilog/catalogue.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Station reference data: the catalogue joining names with coordinates.
|
|
2
|
+
|
|
3
|
+
Source of truth is constants (STATIONS + STATION_COORDS), not the DB stations
|
|
4
|
+
table — that table exists only for the readings foreign key.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .constants import STATION_COORDS, STATIONS
|
|
8
|
+
from .records import StationRecord
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _check_coords() -> None:
|
|
12
|
+
"""Raise ValueError if any STATIONS code lacks a STATION_COORDS entry."""
|
|
13
|
+
missing = set(STATIONS) - set(STATION_COORDS)
|
|
14
|
+
if missing:
|
|
15
|
+
raise ValueError(
|
|
16
|
+
f"STATION_COORDS is missing coordinates for: {sorted(missing)}"
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
_check_coords()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def stations() -> list[StationRecord]:
|
|
24
|
+
"""Return all WGMN stations sorted by code, each with WGS84 coordinates."""
|
|
25
|
+
return [
|
|
26
|
+
StationRecord(
|
|
27
|
+
code=code,
|
|
28
|
+
name=name,
|
|
29
|
+
water_body=water_body,
|
|
30
|
+
latitude=STATION_COORDS[code][0],
|
|
31
|
+
longitude=STATION_COORDS[code][1],
|
|
32
|
+
)
|
|
33
|
+
for code, (name, water_body) in sorted(STATIONS.items())
|
|
34
|
+
]
|
fluvilog/cli.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Near-real-time readings from Hamburg's water quality network (WGMN).
|
|
2
|
+
|
|
3
|
+
Polls the public HamburgService platform (Wassergüte-Auskunft) at
|
|
4
|
+
serviceportal.hamburg.de and, by default, stores readings continuously.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
fluvilog # collect: poll and store (default subcommand)
|
|
8
|
+
fluvilog collect --station BL SH # collect only specific stations
|
|
9
|
+
fluvilog collect --db water.db --interval 10m
|
|
10
|
+
fluvilog once # one-shot fetch and print
|
|
11
|
+
fluvilog once --csv values.csv # ... and write to CSV
|
|
12
|
+
fluvilog list # list known stations
|
|
13
|
+
fluvilog serve-api # serve the HTTP read API (needs [api] extra)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import logging
|
|
18
|
+
import sqlite3
|
|
19
|
+
import sys
|
|
20
|
+
|
|
21
|
+
import pandas as pd
|
|
22
|
+
import requests
|
|
23
|
+
|
|
24
|
+
from .constants import (
|
|
25
|
+
DEFAULT_API_HOST,
|
|
26
|
+
DEFAULT_API_PORT,
|
|
27
|
+
DEFAULT_DB_PATH,
|
|
28
|
+
DEFAULT_INTERVAL,
|
|
29
|
+
DEFAULT_PARAMETERS,
|
|
30
|
+
STATIONS,
|
|
31
|
+
)
|
|
32
|
+
from .service import collect, parse_interval
|
|
33
|
+
from .storage import IncompatibleSchemaError, SqliteStorage
|
|
34
|
+
from .wgmn import fetch
|
|
35
|
+
|
|
36
|
+
_COMMANDS = {"collect", "once", "list", "serve-api"}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def resolve_codes(selectors: list[str] | None) -> list[str]:
|
|
40
|
+
"""Translate --station arguments (code or name, case-insensitive) to codes."""
|
|
41
|
+
if not selectors:
|
|
42
|
+
return list(STATIONS)
|
|
43
|
+
by_name = {name.casefold(): code for code, (name, _) in STATIONS.items()}
|
|
44
|
+
codes: list[str] = []
|
|
45
|
+
for sel in selectors:
|
|
46
|
+
if sel.upper() in STATIONS:
|
|
47
|
+
codes.append(sel.upper())
|
|
48
|
+
elif sel.casefold() in by_name:
|
|
49
|
+
codes.append(by_name[sel.casefold()])
|
|
50
|
+
else:
|
|
51
|
+
print(f" ! unknown station: {sel!r} (list shows all)", file=sys.stderr)
|
|
52
|
+
return codes
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _run_list() -> int:
|
|
56
|
+
"""Print the station catalogue and exit."""
|
|
57
|
+
print("# Known WGMN stations:")
|
|
58
|
+
for code, (name, water_body) in STATIONS.items():
|
|
59
|
+
print(f" {code} {name} ({water_body})")
|
|
60
|
+
return 0
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _run_once(args: argparse.Namespace) -> int:
|
|
64
|
+
"""Fetch the latest values once, print them, and optionally write CSV."""
|
|
65
|
+
codes = resolve_codes(args.station)
|
|
66
|
+
if not codes:
|
|
67
|
+
return 2
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
df = fetch(codes, DEFAULT_PARAMETERS)
|
|
71
|
+
except requests.RequestException as e:
|
|
72
|
+
print(f"Network/HTTP error: {e}", file=sys.stderr)
|
|
73
|
+
return 1
|
|
74
|
+
|
|
75
|
+
if df.empty:
|
|
76
|
+
print("No measurements available.", file=sys.stderr)
|
|
77
|
+
return 1
|
|
78
|
+
|
|
79
|
+
pd.set_option("display.max_rows", None)
|
|
80
|
+
pd.set_option("display.width", 200)
|
|
81
|
+
print(df.to_string(index=False))
|
|
82
|
+
latest = df["timestamp"].max()
|
|
83
|
+
print(
|
|
84
|
+
f"\n{len(df)} measurement(s) from {df['station'].nunique()} station(s), "
|
|
85
|
+
f"latest: {latest:%d.%m.%Y %H:%M}."
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
if args.csv:
|
|
89
|
+
df.to_csv(args.csv, index=False)
|
|
90
|
+
print(f"Saved: {args.csv}")
|
|
91
|
+
return 0
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _run_collect(args: argparse.Namespace) -> int:
|
|
95
|
+
"""Run the continuous poll-and-store loop."""
|
|
96
|
+
codes = resolve_codes(args.station)
|
|
97
|
+
if not codes:
|
|
98
|
+
return 2
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
with SqliteStorage(args.db) as store:
|
|
102
|
+
return collect(codes, DEFAULT_PARAMETERS, store, args.interval)
|
|
103
|
+
except IncompatibleSchemaError as e:
|
|
104
|
+
print(str(e), file=sys.stderr)
|
|
105
|
+
return 1
|
|
106
|
+
except sqlite3.Error as e:
|
|
107
|
+
print(f"Database error: {e}", file=sys.stderr)
|
|
108
|
+
return 1
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _run_serve_api(args: argparse.Namespace) -> int:
|
|
112
|
+
"""Serve the HTTP read API under uvicorn (requires the optional [api] extra).
|
|
113
|
+
|
|
114
|
+
Imports the web stack lazily so the base CLI works without [api] installed.
|
|
115
|
+
"""
|
|
116
|
+
try:
|
|
117
|
+
import uvicorn
|
|
118
|
+
|
|
119
|
+
from .api import create_app
|
|
120
|
+
except ImportError:
|
|
121
|
+
print(
|
|
122
|
+
"The HTTP API needs the optional dependencies. "
|
|
123
|
+
"Install them with: pip install 'fluvilog[api]'",
|
|
124
|
+
file=sys.stderr,
|
|
125
|
+
)
|
|
126
|
+
return 1
|
|
127
|
+
|
|
128
|
+
app = create_app(db_path=args.db, allowed_origins=args.cors_origin)
|
|
129
|
+
uvicorn.run(app, host=args.host, port=args.port)
|
|
130
|
+
return 0
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def main(argv: list[str] | None = None) -> int:
|
|
134
|
+
"""Entry point. Bare invocation (no subcommand) runs `collect`."""
|
|
135
|
+
logging.basicConfig(
|
|
136
|
+
level=logging.INFO,
|
|
137
|
+
format="%(asctime)s %(levelname)s %(message)s",
|
|
138
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
139
|
+
stream=sys.stderr,
|
|
140
|
+
)
|
|
141
|
+
argv = sys.argv[1:] if argv is None else list(argv)
|
|
142
|
+
if not argv or (argv[0] not in _COMMANDS and argv[0] not in {"-h", "--help"}):
|
|
143
|
+
argv = ["collect", *argv]
|
|
144
|
+
|
|
145
|
+
ap = argparse.ArgumentParser(
|
|
146
|
+
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
|
|
147
|
+
)
|
|
148
|
+
sub = ap.add_subparsers(dest="command", required=True)
|
|
149
|
+
|
|
150
|
+
p_collect = sub.add_parser("collect", help="Continuously fetch and store (default)")
|
|
151
|
+
p_collect.add_argument(
|
|
152
|
+
"--station", nargs="+", metavar="CODE/NAME", help="Only these stations"
|
|
153
|
+
)
|
|
154
|
+
p_collect.add_argument(
|
|
155
|
+
"--db",
|
|
156
|
+
metavar="PATH",
|
|
157
|
+
default=DEFAULT_DB_PATH,
|
|
158
|
+
help=f"SQLite database path (default: {DEFAULT_DB_PATH})",
|
|
159
|
+
)
|
|
160
|
+
p_collect.add_argument(
|
|
161
|
+
"--interval",
|
|
162
|
+
type=parse_interval,
|
|
163
|
+
default=float(DEFAULT_INTERVAL),
|
|
164
|
+
metavar="DURATION",
|
|
165
|
+
help=f"Poll interval, e.g. 30s/10m/1h (default: {DEFAULT_INTERVAL}s)",
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
p_once = sub.add_parser("once", help="One-shot fetch and print")
|
|
169
|
+
p_once.add_argument(
|
|
170
|
+
"--station", nargs="+", metavar="CODE/NAME", help="Only these stations"
|
|
171
|
+
)
|
|
172
|
+
p_once.add_argument("--csv", metavar="PATH", help="Write result to CSV")
|
|
173
|
+
|
|
174
|
+
sub.add_parser("list", help="List known stations and exit")
|
|
175
|
+
|
|
176
|
+
p_api = sub.add_parser(
|
|
177
|
+
"serve-api", help="Serve the HTTP read API (needs the [api] extra)"
|
|
178
|
+
)
|
|
179
|
+
p_api.add_argument(
|
|
180
|
+
"--db",
|
|
181
|
+
metavar="PATH",
|
|
182
|
+
default=DEFAULT_DB_PATH,
|
|
183
|
+
help=f"SQLite database path (default: {DEFAULT_DB_PATH})",
|
|
184
|
+
)
|
|
185
|
+
p_api.add_argument(
|
|
186
|
+
"--host",
|
|
187
|
+
metavar="HOST",
|
|
188
|
+
default=DEFAULT_API_HOST,
|
|
189
|
+
help=f"Bind host (default: {DEFAULT_API_HOST})",
|
|
190
|
+
)
|
|
191
|
+
p_api.add_argument(
|
|
192
|
+
"--port",
|
|
193
|
+
type=int,
|
|
194
|
+
metavar="PORT",
|
|
195
|
+
default=DEFAULT_API_PORT,
|
|
196
|
+
help=f"Bind port (default: {DEFAULT_API_PORT})",
|
|
197
|
+
)
|
|
198
|
+
p_api.add_argument(
|
|
199
|
+
"--cors-origin",
|
|
200
|
+
action="append",
|
|
201
|
+
default=[],
|
|
202
|
+
metavar="ORIGIN",
|
|
203
|
+
help="Allowed CORS origin; repeatable (default: none)",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
args = ap.parse_args(argv)
|
|
207
|
+
|
|
208
|
+
if args.command == "list":
|
|
209
|
+
return _run_list()
|
|
210
|
+
if args.command == "once":
|
|
211
|
+
return _run_once(args)
|
|
212
|
+
if args.command == "serve-api":
|
|
213
|
+
return _run_serve_api(args)
|
|
214
|
+
return _run_collect(args)
|
fluvilog/constants.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
"""Static configuration: endpoint, request limits, and the station/parameter
|
|
2
|
+
catalogues of the HamburgService water quality endpoint."""
|
|
3
|
+
|
|
4
|
+
# Service endpoint (ASP.NET WebForms "Wassergüte-Auskunft").
|
|
5
|
+
BASE = "https://serviceportal.hamburg.de/HamburgGateway"
|
|
6
|
+
START = f"{BASE}/Service/StartService/WGMN?linkId=0&ars=020000000000"
|
|
7
|
+
PFX = "GatewayMaster:ContentSection:wucStationenAuswahlListe1:"
|
|
8
|
+
ENCODING = "windows-1252"
|
|
9
|
+
TIMEOUT = 60
|
|
10
|
+
USER_AGENT = (
|
|
11
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
12
|
+
"AppleWebKit/605.1.15 (KHTML, like Gecko) Safari/605.1.15"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
# Service limit per request.
|
|
16
|
+
MAX_STATIONS = 5
|
|
17
|
+
MAX_PARAMETERS = 5
|
|
18
|
+
|
|
19
|
+
# Station code -> (station name, body of water). The 9 official WGMN stations.
|
|
20
|
+
# The form's checkbox order (cblStationen:0..8) is not encoded here; it is read
|
|
21
|
+
# from the live form at runtime (see _station_index_map in wgmn.py).
|
|
22
|
+
STATIONS: dict[str, tuple[str, str]] = {
|
|
23
|
+
"BU": ("Bunthaus", "Elbe"),
|
|
24
|
+
"SH": ("Seemannshöft", "Elbe"),
|
|
25
|
+
"BL": ("Blankenese", "Elbe"),
|
|
26
|
+
"LB": ("Lombardsbrücke", "Alster"),
|
|
27
|
+
"HA": ("Haselknick", "Alster"),
|
|
28
|
+
"TA": ("Rosenbrook", "Tarpenbek"),
|
|
29
|
+
"BK": ("Brügkamp", "Ammersbek"),
|
|
30
|
+
"FH": ("Fischerhof", "Bille"),
|
|
31
|
+
"WA": ("Wandsbeker Allee", "Wandse"),
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
# WGS84 (latitude, longitude) per station code. Static reference data: the 9
|
|
35
|
+
# WGMN stations are fixed, so coordinates live here rather than in the DB schema.
|
|
36
|
+
# Order is (lat, lon) for human sanity; GeoJSON's [lon, lat] is the API's job.
|
|
37
|
+
# Keys must stay in sync with STATIONS (enforced by catalogue.stations()).
|
|
38
|
+
STATION_COORDS: dict[str, tuple[float, float]] = {
|
|
39
|
+
"BU": (53.46166, 10.06434), # Bunthaus (Elbe)
|
|
40
|
+
"SH": (53.54024, 9.87984), # Seemannshöft (Elbe)
|
|
41
|
+
"BL": (53.55587, 9.80545), # Blankenese (Elbe)
|
|
42
|
+
"LB": (53.55725, 9.99797), # Lombardsbrücke (Alster)
|
|
43
|
+
"HA": (53.69548, 10.12075), # Haselknick (Alster)
|
|
44
|
+
"TA": (53.60223, 9.98624), # Rosenbrook (Tarpenbek)
|
|
45
|
+
"BK": (53.71191, 10.16285), # Brügkamp (Ammersbek)
|
|
46
|
+
"FH": (53.48927, 10.21090), # Fischerhof (Bille)
|
|
47
|
+
"WA": (53.57616, 10.06895), # Wandsbeker Allee (Wandse)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
# Order of the parameter checkboxes (clbMesswerte:0..13) in the form.
|
|
51
|
+
PARAMETERS = [
|
|
52
|
+
"Lufttemperatur",
|
|
53
|
+
"Wassertemperatur",
|
|
54
|
+
"Sauerstoffkonzentration",
|
|
55
|
+
"Sauerstoffsättigung",
|
|
56
|
+
"pH-Wert",
|
|
57
|
+
"Leitfähigkeit Kappa 25",
|
|
58
|
+
"Trübung",
|
|
59
|
+
"Gesamtchlorophyll",
|
|
60
|
+
"Chlorophyll Blaualgen",
|
|
61
|
+
"Chlorophyll Grünalgen",
|
|
62
|
+
"Chlorophyll Kieselalgen",
|
|
63
|
+
"Chlorophyll Cryptophyceen",
|
|
64
|
+
"UV-Absorption",
|
|
65
|
+
"AlarmIndex",
|
|
66
|
+
]
|
|
67
|
+
# Core parameters (water temp, O2 conc., O2 sat., pH, conductivity, turbidity).
|
|
68
|
+
DEFAULT_PARAMETERS = [1, 2, 3, 4, 5, 6]
|
|
69
|
+
|
|
70
|
+
# Collect-mode defaults.
|
|
71
|
+
DEFAULT_INTERVAL = 600 # seconds between polls (source updates ~every 10 min)
|
|
72
|
+
MIN_INTERVAL = 30 # floor enforced by --interval parsing
|
|
73
|
+
DEFAULT_DB_PATH = "fluvilog.db" # SQLite file, relative to cwd
|
|
74
|
+
|
|
75
|
+
# Timezone of stored reading timestamps; also the offset all read APIs emit.
|
|
76
|
+
BERLIN_TZ = "Europe/Berlin"
|
|
77
|
+
|
|
78
|
+
# HTTP API tier (optional [api] extra; see fluvilog.api).
|
|
79
|
+
MAX_WINDOW_DAYS = 30 # largest /api/readings window before a 422
|
|
80
|
+
DEFAULT_API_HOST = "127.0.0.1"
|
|
81
|
+
DEFAULT_API_PORT = 8000
|
|
82
|
+
|
|
83
|
+
# Schema object names (DDL itself lives in storage.py).
|
|
84
|
+
TABLE_READINGS = "readings" # fact table
|
|
85
|
+
TABLE_STATIONS = "stations" # station dimension
|
|
86
|
+
TABLE_PARAMETERS = "parameters" # parameter dimension
|
|
87
|
+
VIEW_READINGS_FULL = "readings_full" # denormalized join of the three
|
fluvilog/records.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Internal read records: the contract between storage/catalogue and callers.
|
|
2
|
+
|
|
3
|
+
Plain dataclasses, deliberately not Pydantic, so the persistence and read
|
|
4
|
+
layers carry no dependency on the optional API tier. See fluvilog.api.schemas
|
|
5
|
+
for the public HTTP shapes.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True, slots=True)
|
|
13
|
+
class StationRecord:
|
|
14
|
+
"""A WGMN station with its WGS84 position.
|
|
15
|
+
|
|
16
|
+
latitude/longitude are decimal degrees (EPSG:4326).
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
code: str
|
|
20
|
+
name: str
|
|
21
|
+
water_body: str
|
|
22
|
+
latitude: float
|
|
23
|
+
longitude: float
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True, slots=True)
|
|
27
|
+
class ReadingRecord:
|
|
28
|
+
"""A single stored reading.
|
|
29
|
+
|
|
30
|
+
parameter is the display name (e.g. "Wassertemperatur"). timestamp is
|
|
31
|
+
tz-aware Europe/Berlin. value is None when the sensor reported no value;
|
|
32
|
+
None is preserved, not dropped.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
station_code: str
|
|
36
|
+
parameter: str
|
|
37
|
+
unit: str
|
|
38
|
+
timestamp: datetime
|
|
39
|
+
value: float | None
|
fluvilog/service.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""Continuous polling loop that persists WGMN readings to a Storage backend."""
|
|
2
|
+
|
|
3
|
+
import datetime as dt
|
|
4
|
+
import logging
|
|
5
|
+
import signal
|
|
6
|
+
import sqlite3
|
|
7
|
+
import threading
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
|
|
12
|
+
from .constants import MIN_INTERVAL
|
|
13
|
+
from .storage import Storage
|
|
14
|
+
from .wgmn import fetch_history
|
|
15
|
+
|
|
16
|
+
_UNITS = {"s": 1, "m": 60, "h": 3600}
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def parse_interval(text: str) -> float:
|
|
20
|
+
"""Parse '600', '30s', '10m', or '1h' to seconds.
|
|
21
|
+
|
|
22
|
+
A bare number is seconds; a trailing s/m/h scales accordingly. The result
|
|
23
|
+
must be at least MIN_INTERVAL. Raises ValueError on malformed or too-small
|
|
24
|
+
input.
|
|
25
|
+
"""
|
|
26
|
+
text = text.strip().lower()
|
|
27
|
+
if text and text[-1] in _UNITS:
|
|
28
|
+
seconds = float(text[:-1]) * _UNITS[text[-1]]
|
|
29
|
+
else:
|
|
30
|
+
seconds = float(text)
|
|
31
|
+
if seconds < MIN_INTERVAL:
|
|
32
|
+
raise ValueError(f"interval must be >= {MIN_INTERVAL}s")
|
|
33
|
+
return seconds
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def collect(
|
|
37
|
+
station_codes: list[str],
|
|
38
|
+
parameter_idx: list[int],
|
|
39
|
+
storage: Storage,
|
|
40
|
+
interval: float,
|
|
41
|
+
*,
|
|
42
|
+
log: logging.Logger | None = None,
|
|
43
|
+
) -> int:
|
|
44
|
+
"""Poll the selected stations/parameters every `interval` seconds.
|
|
45
|
+
|
|
46
|
+
Each poll fetches the full window and persists it idempotently. Network and
|
|
47
|
+
storage errors in one iteration are logged and skipped; the loop continues.
|
|
48
|
+
The interval is measured from each iteration's start, so a slow fetch
|
|
49
|
+
shortens the following wait rather than accumulating drift. Runs until
|
|
50
|
+
SIGINT or SIGTERM, then returns 0.
|
|
51
|
+
"""
|
|
52
|
+
log = log or logging.getLogger(__name__)
|
|
53
|
+
stop = threading.Event()
|
|
54
|
+
|
|
55
|
+
def _handle(signum: int, frame: object) -> None:
|
|
56
|
+
log.info("received %s, shutting down", signal.Signals(signum).name)
|
|
57
|
+
stop.set()
|
|
58
|
+
|
|
59
|
+
signal.signal(signal.SIGINT, _handle)
|
|
60
|
+
signal.signal(signal.SIGTERM, _handle)
|
|
61
|
+
|
|
62
|
+
log.info(
|
|
63
|
+
"collecting %d station(s) × %d parameter(s), every %.0fs",
|
|
64
|
+
len(station_codes),
|
|
65
|
+
len(parameter_idx),
|
|
66
|
+
interval,
|
|
67
|
+
)
|
|
68
|
+
while not stop.is_set():
|
|
69
|
+
started = time.monotonic()
|
|
70
|
+
fetched_at = dt.datetime.now(dt.UTC)
|
|
71
|
+
try:
|
|
72
|
+
df = fetch_history(station_codes, parameter_idx)
|
|
73
|
+
inserted = storage.write(df, fetched_at)
|
|
74
|
+
log.info("inserted %d new row(s)", inserted)
|
|
75
|
+
except requests.RequestException as e:
|
|
76
|
+
log.warning("fetch failed, skipping iteration: %s", e)
|
|
77
|
+
except sqlite3.Error as e:
|
|
78
|
+
log.warning("storage error, skipping iteration: %s", e)
|
|
79
|
+
delay = max(0.0, interval - (time.monotonic() - started))
|
|
80
|
+
log.info("next poll in %.0fs", delay)
|
|
81
|
+
if stop.wait(timeout=delay):
|
|
82
|
+
break
|
|
83
|
+
return 0
|