austrata 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- austrata-0.1.0/LICENSE +21 -0
- austrata-0.1.0/PKG-INFO +189 -0
- austrata-0.1.0/README.md +146 -0
- austrata-0.1.0/austrata/__init__.py +43 -0
- austrata-0.1.0/austrata/application/__init__.py +1 -0
- austrata-0.1.0/austrata/application/fetch_boreholes.py +119 -0
- austrata-0.1.0/austrata/application/fetch_hydrogeology.py +93 -0
- austrata-0.1.0/austrata/application/fetch_ngis.py +175 -0
- austrata-0.1.0/austrata/client.py +178 -0
- austrata-0.1.0/austrata/domain/__init__.py +16 -0
- austrata-0.1.0/austrata/domain/borehole.py +421 -0
- austrata-0.1.0/austrata/domain/coercion.py +37 -0
- austrata-0.1.0/austrata/domain/construction.py +78 -0
- austrata-0.1.0/austrata/domain/hydrogeology.py +40 -0
- austrata-0.1.0/austrata/domain/region.py +145 -0
- austrata-0.1.0/austrata/domain/stratigraphy.py +167 -0
- austrata-0.1.0/austrata/groundwater_client.py +159 -0
- austrata-0.1.0/austrata/infrastructure/__init__.py +1 -0
- austrata-0.1.0/austrata/infrastructure/arcgis_rest_client.py +155 -0
- austrata-0.1.0/austrata/infrastructure/dataset_cache.py +359 -0
- austrata-0.1.0/austrata/infrastructure/feature_mapper.py +141 -0
- austrata-0.1.0/austrata/infrastructure/http.py +163 -0
- austrata-0.1.0/austrata/infrastructure/ngis_download.py +216 -0
- austrata-0.1.0/austrata/infrastructure/ngis_mapper.py +198 -0
- austrata-0.1.0/austrata/infrastructure/ngis_optimiser.py +191 -0
- austrata-0.1.0/austrata/infrastructure/ngis_sources.py +157 -0
- austrata-0.1.0/austrata/infrastructure/ogc_wfs_client.py +218 -0
- austrata-0.1.0/austrata/ngis_client.py +148 -0
- austrata-0.1.0/austrata/ports/__init__.py +1 -0
- austrata-0.1.0/austrata/ports/cache.py +64 -0
- austrata-0.1.0/austrata/ports/data_source.py +59 -0
- austrata-0.1.0/austrata/py.typed +0 -0
- austrata-0.1.0/austrata.egg-info/PKG-INFO +189 -0
- austrata-0.1.0/austrata.egg-info/SOURCES.txt +56 -0
- austrata-0.1.0/austrata.egg-info/dependency_links.txt +1 -0
- austrata-0.1.0/austrata.egg-info/requires.txt +21 -0
- austrata-0.1.0/austrata.egg-info/top_level.txt +1 -0
- austrata-0.1.0/pyproject.toml +96 -0
- austrata-0.1.0/setup.cfg +4 -0
- austrata-0.1.0/tests/test_arcgis_client.py +126 -0
- austrata-0.1.0/tests/test_arcgis_live.py +48 -0
- austrata-0.1.0/tests/test_borehole.py +222 -0
- austrata-0.1.0/tests/test_client.py +237 -0
- austrata-0.1.0/tests/test_client_live.py +53 -0
- austrata-0.1.0/tests/test_construction.py +58 -0
- austrata-0.1.0/tests/test_dataset_cache.py +239 -0
- austrata-0.1.0/tests/test_fetch_ngis.py +186 -0
- austrata-0.1.0/tests/test_ga_server_live.py +715 -0
- austrata-0.1.0/tests/test_groundwater_client.py +257 -0
- austrata-0.1.0/tests/test_http.py +79 -0
- austrata-0.1.0/tests/test_intervals.py +116 -0
- austrata-0.1.0/tests/test_ngis_client.py +280 -0
- austrata-0.1.0/tests/test_ngis_download.py +119 -0
- austrata-0.1.0/tests/test_ngis_live.py +156 -0
- austrata-0.1.0/tests/test_ngis_optimiser.py +171 -0
- austrata-0.1.0/tests/test_region.py +85 -0
- austrata-0.1.0/tests/test_wfs_client.py +106 -0
- austrata-0.1.0/tests/test_wfs_live.py +51 -0
austrata-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Sia Ghelichkhan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
austrata-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: austrata
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Access Geoscience Australia borehole and hydrogeology data with a provenance-aware local cache
|
|
5
|
+
Author-email: Sia Ghelichkhan <siavash.ghelichkhan@anu.edu.au>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/g-adopt/austrata
|
|
8
|
+
Project-URL: Issues, https://github.com/g-adopt/austrata/issues
|
|
9
|
+
Keywords: geoscience australia,boreholes,hydrogeology,stratigraphy,geospatial
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Science/Research
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering
|
|
18
|
+
Classifier: Topic :: Scientific/Engineering :: GIS
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: geopandas>=0.14
|
|
23
|
+
Requires-Dist: shapely>=2.0
|
|
24
|
+
Requires-Dist: pyproj>=3.5
|
|
25
|
+
Requires-Dist: pyogrio>=0.7
|
|
26
|
+
Requires-Dist: fiona>=1.9
|
|
27
|
+
Requires-Dist: pyarrow>=14.0
|
|
28
|
+
Requires-Dist: requests>=2.28
|
|
29
|
+
Requires-Dist: tenacity>=8.2
|
|
30
|
+
Requires-Dist: platformdirs>=3.0
|
|
31
|
+
Requires-Dist: filelock>=3.12
|
|
32
|
+
Requires-Dist: tqdm>=4.64
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-cov>=3.0; extra == "dev"
|
|
36
|
+
Requires-Dist: vcrpy>=5.0; extra == "dev"
|
|
37
|
+
Requires-Dist: responses>=0.23; extra == "dev"
|
|
38
|
+
Requires-Dist: flake8>=5.0; extra == "dev"
|
|
39
|
+
Requires-Dist: black>=22.0; extra == "dev"
|
|
40
|
+
Requires-Dist: mypy>=0.990; extra == "dev"
|
|
41
|
+
Requires-Dist: types-requests; extra == "dev"
|
|
42
|
+
Dynamic: license-file
|
|
43
|
+
|
|
44
|
+
# austrata
|
|
45
|
+
|
|
46
|
+
[](https://github.com/g-adopt/austrata/actions/workflows/tests.yml)
|
|
47
|
+
[](https://pypi.org/project/austrata/)
|
|
48
|
+
[](https://pypi.org/project/austrata/)
|
|
49
|
+
[](LICENSE)
|
|
50
|
+
|
|
51
|
+
Access Geoscience Australia borehole and hydrogeology data through their open
|
|
52
|
+
OGC/ArcGIS web services, with a provenance-aware local cache.
|
|
53
|
+
|
|
54
|
+
`austrata` models boreholes as first-class objects (a header plus downhole
|
|
55
|
+
stratigraphy and earth-material logs), lets you pull every bore inside an
|
|
56
|
+
arbitrary polygon or bounding box, and exposes the Hydrogeology of Australia
|
|
57
|
+
polygon layer to overlay. It talks to two backends behind one API: the GA
|
|
58
|
+
boreholes GeoServer (WFS) and the Hydrogeology of Australia ArcGIS MapServer.
|
|
59
|
+
Results are cached locally as GeoParquet with a provenance manifest, and
|
|
60
|
+
revalidated before refetching so repeated queries are cheap and reproducible.
|
|
61
|
+
|
|
62
|
+
Everything is returned in lon/lat (EPSG:4283, GDA94 geographic). Map projection
|
|
63
|
+
and mesh generation are deliberately out of scope — those live in the companion
|
|
64
|
+
`omega` package, which consumes this one.
|
|
65
|
+
|
|
66
|
+
## Installation
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
pip install -e ".[dev]"
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Requires Python 3.11+. Runtime dependencies are geopandas, shapely, pyproj,
|
|
73
|
+
pyogrio, pyarrow, requests, tenacity, platformdirs, and filelock.
|
|
74
|
+
|
|
75
|
+
## Quickstart
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from austrata import GADataClient
|
|
79
|
+
from shapely.geometry import box
|
|
80
|
+
|
|
81
|
+
ga = GADataClient() # cache defaults to the OS user cache dir
|
|
82
|
+
|
|
83
|
+
# Boreholes inside a bounding box (lon/lat). Paginated and cached automatically.
|
|
84
|
+
bores = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1))
|
|
85
|
+
print(len(bores), "boreholes")
|
|
86
|
+
gdf = bores.to_geodataframe() # headers as a GeoDataFrame (EPSG:4283)
|
|
87
|
+
|
|
88
|
+
# Or pass any shapely geometry as the region.
|
|
89
|
+
bores = ga.boreholes(region=box(148.9, -35.6, 149.3, -35.1))
|
|
90
|
+
|
|
91
|
+
# Load downhole logs for the whole collection in one shot (ENO-batched, cached).
|
|
92
|
+
bores.load_logs("stratigraphy")
|
|
93
|
+
for b in bores:
|
|
94
|
+
for interval in b.stratigraphy: # list of StratigraphyInterval
|
|
95
|
+
if interval.valid:
|
|
96
|
+
print(b.name, interval.top_depth, interval.bottom_depth, interval.unit)
|
|
97
|
+
|
|
98
|
+
bores.load_logs("earth_material") # b.earth_material is then populated
|
|
99
|
+
|
|
100
|
+
# Export the loaded logs as a tidy GeoDataFrame (one row per interval, borehole
|
|
101
|
+
# point geometry, EPSG:4283). Save with geopandas: .to_file('x.gpkg') / .to_csv(...).
|
|
102
|
+
strat = bores.stratigraphy_geodataframe()
|
|
103
|
+
earth = bores.earth_material_geodataframe()
|
|
104
|
+
|
|
105
|
+
# A single borehole by ENO or PID.
|
|
106
|
+
one = ga.borehole("35147")
|
|
107
|
+
|
|
108
|
+
# Hydrogeology polygons to overlay, as a GeoDataFrame.
|
|
109
|
+
hydro = ga.hydrogeology(bbox=(148.9, -35.6, 149.3, -35.1))
|
|
110
|
+
|
|
111
|
+
# A backend filter passes straight through.
|
|
112
|
+
diamond = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1), filter="drillingMethod='Diamond'")
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Dry-run counts
|
|
116
|
+
|
|
117
|
+
Pass `count_only=True` to get the number of features without downloading them
|
|
118
|
+
(uses the cheap `resultType=hits` / `returnCountOnly` paths):
|
|
119
|
+
|
|
120
|
+
```python
|
|
121
|
+
n_bores = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1), count_only=True)
|
|
122
|
+
n_units = ga.hydrogeology(bbox=(148.9, -35.6, 149.3, -35.1), count_only=True)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Caching, freshness, and offline use
|
|
126
|
+
|
|
127
|
+
Each logical query is cached as a `<hash>.parquet` file plus an entry in a
|
|
128
|
+
`manifest.json`, in an OS-appropriate user cache directory
|
|
129
|
+
(e.g. `~/Library/Caches/austrata` on macOS). Override the location with the
|
|
130
|
+
`cache_dir=` argument or the `AUSTRATA_DATA_DIR` environment variable.
|
|
131
|
+
|
|
132
|
+
On a repeat query `austrata` revalidates rather than blindly refetching: the
|
|
133
|
+
ArcGIS path uses the service `ETag` (conditional `If-None-Match`), and the WFS
|
|
134
|
+
path — which exposes no ETag — compares the `numberMatched` count as a cheap
|
|
135
|
+
fingerprint. Both fall back to a max-age TTL (30 days by default), so a
|
|
136
|
+
same-count content edit is eventually picked up. `force_refresh=True` is the
|
|
137
|
+
only hard guarantee of a fresh pull.
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
ga = GADataClient(offline=True) # never touch the network; serve cache or raise
|
|
141
|
+
ga = GADataClient(max_age=7 * 24 * 3600) # revalidate-by-refetch weekly
|
|
142
|
+
bores = ga.boreholes(bbox=..., force_refresh=True)
|
|
143
|
+
|
|
144
|
+
# Inspect or clear the cache.
|
|
145
|
+
ga.cache.info() # dir, entry count, total bytes, per-entry detail
|
|
146
|
+
ga.cache.list() # cached keys
|
|
147
|
+
ga.cache.clear() # wipe everything (or clear(key) for one)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
To prefetch for offline/field use, run the queries you need once while online;
|
|
151
|
+
they land in the cache and an `offline=True` client serves them thereafter.
|
|
152
|
+
|
|
153
|
+
### Citing the data
|
|
154
|
+
|
|
155
|
+
Geoscience Australia publishes this data under CC BY 4.0. `austrata` records the
|
|
156
|
+
provenance of every cached query so you can cite it with its access date:
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
print(bores.citation()) # citation string incl. "Accessed YYYY-MM-DD"
|
|
160
|
+
bores.provenance() # dict: source_url, license, fetched_at, ...
|
|
161
|
+
|
|
162
|
+
from austrata import hydrogeology_citation
|
|
163
|
+
print(hydrogeology_citation(hydro))
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## The lon/lat (GDA94) contract
|
|
167
|
+
|
|
168
|
+
Both services are native EPSG:4283 (GDA94 geographic). `austrata` pins this end to
|
|
169
|
+
end: the WFS bbox carries an explicit `EPSG:4283` suffix, and ArcGIS queries
|
|
170
|
+
force `outSR=4283` (its GeoJSON otherwise silently defaults to WGS84). Every
|
|
171
|
+
geometry you get back is lon/lat in GDA94 — no reprojection happens here.
|
|
172
|
+
|
|
173
|
+
## Architecture
|
|
174
|
+
|
|
175
|
+
The package follows clean-architecture / DDD layering (see `DESIGN.md`):
|
|
176
|
+
|
|
177
|
+
- `domain/` — pure value objects and entities (`Region`, `Borehole`,
|
|
178
|
+
`BoreholeCollection`, `StratigraphyInterval`, `EarthMaterialInterval`,
|
|
179
|
+
`HydrogeologyUnit`). No I/O.
|
|
180
|
+
- `ports/` — the interfaces the application layer depends on (`BoreholeSource`,
|
|
181
|
+
`HydrogeologySource`, `DatasetCache`).
|
|
182
|
+
- `application/` — use cases that build per-backend cache fetch-plans.
|
|
183
|
+
- `infrastructure/` — the HTTP client, the WFS and ArcGIS adapters, the feature
|
|
184
|
+
mappers, and the dataset cache.
|
|
185
|
+
- `client.py` — the `GADataClient` facade that wires it together.
|
|
186
|
+
|
|
187
|
+
## License
|
|
188
|
+
|
|
189
|
+
MIT (the code). The data accessed through it is © Geoscience Australia, CC BY 4.0.
|
austrata-0.1.0/README.md
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# austrata
|
|
2
|
+
|
|
3
|
+
[](https://github.com/g-adopt/austrata/actions/workflows/tests.yml)
|
|
4
|
+
[](https://pypi.org/project/austrata/)
|
|
5
|
+
[](https://pypi.org/project/austrata/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
Access Geoscience Australia borehole and hydrogeology data through their open
|
|
9
|
+
OGC/ArcGIS web services, with a provenance-aware local cache.
|
|
10
|
+
|
|
11
|
+
`austrata` models boreholes as first-class objects (a header plus downhole
|
|
12
|
+
stratigraphy and earth-material logs), lets you pull every bore inside an
|
|
13
|
+
arbitrary polygon or bounding box, and exposes the Hydrogeology of Australia
|
|
14
|
+
polygon layer to overlay. It talks to two backends behind one API: the GA
|
|
15
|
+
boreholes GeoServer (WFS) and the Hydrogeology of Australia ArcGIS MapServer.
|
|
16
|
+
Results are cached locally as GeoParquet with a provenance manifest, and
|
|
17
|
+
revalidated before refetching so repeated queries are cheap and reproducible.
|
|
18
|
+
|
|
19
|
+
Everything is returned in lon/lat (EPSG:4283, GDA94 geographic). Map projection
|
|
20
|
+
and mesh generation are deliberately out of scope — those live in the companion
|
|
21
|
+
`omega` package, which consumes this one.
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install -e ".[dev]"
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Requires Python 3.11+. Runtime dependencies are geopandas, shapely, pyproj,
|
|
30
|
+
pyogrio, pyarrow, requests, tenacity, platformdirs, and filelock.
|
|
31
|
+
|
|
32
|
+
## Quickstart
|
|
33
|
+
|
|
34
|
+
```python
|
|
35
|
+
from austrata import GADataClient
|
|
36
|
+
from shapely.geometry import box
|
|
37
|
+
|
|
38
|
+
ga = GADataClient() # cache defaults to the OS user cache dir
|
|
39
|
+
|
|
40
|
+
# Boreholes inside a bounding box (lon/lat). Paginated and cached automatically.
|
|
41
|
+
bores = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1))
|
|
42
|
+
print(len(bores), "boreholes")
|
|
43
|
+
gdf = bores.to_geodataframe() # headers as a GeoDataFrame (EPSG:4283)
|
|
44
|
+
|
|
45
|
+
# Or pass any shapely geometry as the region.
|
|
46
|
+
bores = ga.boreholes(region=box(148.9, -35.6, 149.3, -35.1))
|
|
47
|
+
|
|
48
|
+
# Load downhole logs for the whole collection in one shot (ENO-batched, cached).
|
|
49
|
+
bores.load_logs("stratigraphy")
|
|
50
|
+
for b in bores:
|
|
51
|
+
for interval in b.stratigraphy: # list of StratigraphyInterval
|
|
52
|
+
if interval.valid:
|
|
53
|
+
print(b.name, interval.top_depth, interval.bottom_depth, interval.unit)
|
|
54
|
+
|
|
55
|
+
bores.load_logs("earth_material") # b.earth_material is then populated
|
|
56
|
+
|
|
57
|
+
# Export the loaded logs as a tidy GeoDataFrame (one row per interval, borehole
|
|
58
|
+
# point geometry, EPSG:4283). Save with geopandas: .to_file('x.gpkg') / .to_csv(...).
|
|
59
|
+
strat = bores.stratigraphy_geodataframe()
|
|
60
|
+
earth = bores.earth_material_geodataframe()
|
|
61
|
+
|
|
62
|
+
# A single borehole by ENO or PID.
|
|
63
|
+
one = ga.borehole("35147")
|
|
64
|
+
|
|
65
|
+
# Hydrogeology polygons to overlay, as a GeoDataFrame.
|
|
66
|
+
hydro = ga.hydrogeology(bbox=(148.9, -35.6, 149.3, -35.1))
|
|
67
|
+
|
|
68
|
+
# A backend filter passes straight through.
|
|
69
|
+
diamond = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1), filter="drillingMethod='Diamond'")
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Dry-run counts
|
|
73
|
+
|
|
74
|
+
Pass `count_only=True` to get the number of features without downloading them
|
|
75
|
+
(uses the cheap `resultType=hits` / `returnCountOnly` paths):
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
n_bores = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1), count_only=True)
|
|
79
|
+
n_units = ga.hydrogeology(bbox=(148.9, -35.6, 149.3, -35.1), count_only=True)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Caching, freshness, and offline use
|
|
83
|
+
|
|
84
|
+
Each logical query is cached as a `<hash>.parquet` file plus an entry in a
|
|
85
|
+
`manifest.json`, in an OS-appropriate user cache directory
|
|
86
|
+
(e.g. `~/Library/Caches/austrata` on macOS). Override the location with the
|
|
87
|
+
`cache_dir=` argument or the `AUSTRATA_DATA_DIR` environment variable.
|
|
88
|
+
|
|
89
|
+
On a repeat query `austrata` revalidates rather than blindly refetching: the
|
|
90
|
+
ArcGIS path uses the service `ETag` (conditional `If-None-Match`), and the WFS
|
|
91
|
+
path — which exposes no ETag — compares the `numberMatched` count as a cheap
|
|
92
|
+
fingerprint. Both fall back to a max-age TTL (30 days by default), so a
|
|
93
|
+
same-count content edit is eventually picked up. `force_refresh=True` is the
|
|
94
|
+
only hard guarantee of a fresh pull.
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
ga = GADataClient(offline=True) # never touch the network; serve cache or raise
|
|
98
|
+
ga = GADataClient(max_age=7 * 24 * 3600) # revalidate-by-refetch weekly
|
|
99
|
+
bores = ga.boreholes(bbox=..., force_refresh=True)
|
|
100
|
+
|
|
101
|
+
# Inspect or clear the cache.
|
|
102
|
+
ga.cache.info() # dir, entry count, total bytes, per-entry detail
|
|
103
|
+
ga.cache.list() # cached keys
|
|
104
|
+
ga.cache.clear() # wipe everything (or clear(key) for one)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
To prefetch for offline/field use, run the queries you need once while online;
|
|
108
|
+
they land in the cache and an `offline=True` client serves them thereafter.
|
|
109
|
+
|
|
110
|
+
### Citing the data
|
|
111
|
+
|
|
112
|
+
Geoscience Australia publishes this data under CC BY 4.0. `austrata` records the
|
|
113
|
+
provenance of every cached query so you can cite it with its access date:
|
|
114
|
+
|
|
115
|
+
```python
|
|
116
|
+
print(bores.citation()) # citation string incl. "Accessed YYYY-MM-DD"
|
|
117
|
+
bores.provenance() # dict: source_url, license, fetched_at, ...
|
|
118
|
+
|
|
119
|
+
from austrata import hydrogeology_citation
|
|
120
|
+
print(hydrogeology_citation(hydro))
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## The lon/lat (GDA94) contract
|
|
124
|
+
|
|
125
|
+
Both services are native EPSG:4283 (GDA94 geographic). `austrata` pins this end to
|
|
126
|
+
end: the WFS bbox carries an explicit `EPSG:4283` suffix, and ArcGIS queries
|
|
127
|
+
force `outSR=4283` (its GeoJSON otherwise silently defaults to WGS84). Every
|
|
128
|
+
geometry you get back is lon/lat in GDA94 — no reprojection happens here.
|
|
129
|
+
|
|
130
|
+
## Architecture
|
|
131
|
+
|
|
132
|
+
The package follows clean-architecture / DDD layering (see `DESIGN.md`):
|
|
133
|
+
|
|
134
|
+
- `domain/` — pure value objects and entities (`Region`, `Borehole`,
|
|
135
|
+
`BoreholeCollection`, `StratigraphyInterval`, `EarthMaterialInterval`,
|
|
136
|
+
`HydrogeologyUnit`). No I/O.
|
|
137
|
+
- `ports/` — the interfaces the application layer depends on (`BoreholeSource`,
|
|
138
|
+
`HydrogeologySource`, `DatasetCache`).
|
|
139
|
+
- `application/` — use cases that build per-backend cache fetch-plans.
|
|
140
|
+
- `infrastructure/` — the HTTP client, the WFS and ArcGIS adapters, the feature
|
|
141
|
+
mappers, and the dataset cache.
|
|
142
|
+
- `client.py` — the `GADataClient` facade that wires it together.
|
|
143
|
+
|
|
144
|
+
## License
|
|
145
|
+
|
|
146
|
+
MIT (the code). The data accessed through it is © Geoscience Australia, CC BY 4.0.
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""austrata — Geoscience Australia borehole and hydrogeology data access.
|
|
2
|
+
|
|
3
|
+
The main entry point is :class:`GADataClient`. The domain value objects are
|
|
4
|
+
exported too for callers that want to work with the typed model directly.
|
|
5
|
+
"""
|
|
6
|
+
import logging as _logging
|
|
7
|
+
|
|
8
|
+
# Structured logging lives under the "austrata" namespace and is silent by
|
|
9
|
+
# default (a NullHandler), per library best practice. Applications opt in via
|
|
10
|
+
# logging.getLogger("austrata").setLevel(...) and attach their own handler.
|
|
11
|
+
# Configured before submodule imports so their module-level getLogger calls
|
|
12
|
+
# inherit a silenced parent regardless of import order.
|
|
13
|
+
_logging.getLogger("austrata").addHandler(_logging.NullHandler())
|
|
14
|
+
|
|
15
|
+
from austrata.domain.region import Region # noqa: E402
|
|
16
|
+
from austrata.domain.borehole import Borehole, BoreholeCollection # noqa: E402
|
|
17
|
+
from austrata.domain.stratigraphy import StratigraphyInterval, EarthMaterialInterval # noqa: E402
|
|
18
|
+
from austrata.domain.construction import ConstructionInterval # noqa: E402
|
|
19
|
+
from austrata.domain.hydrogeology import HydrogeologyUnit # noqa: E402
|
|
20
|
+
from austrata.client import ( # noqa: E402
|
|
21
|
+
GADataClient,
|
|
22
|
+
hydrogeology_citation,
|
|
23
|
+
hydrogeology_provenance,
|
|
24
|
+
)
|
|
25
|
+
from austrata.ngis_client import NGISClient # noqa: E402
|
|
26
|
+
from austrata.groundwater_client import GroundwaterClient # noqa: E402
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
"GADataClient",
|
|
30
|
+
"NGISClient",
|
|
31
|
+
"GroundwaterClient",
|
|
32
|
+
"Region",
|
|
33
|
+
"Borehole",
|
|
34
|
+
"BoreholeCollection",
|
|
35
|
+
"StratigraphyInterval",
|
|
36
|
+
"EarthMaterialInterval",
|
|
37
|
+
"ConstructionInterval",
|
|
38
|
+
"HydrogeologyUnit",
|
|
39
|
+
"hydrogeology_provenance",
|
|
40
|
+
"hydrogeology_citation",
|
|
41
|
+
]
|
|
42
|
+
|
|
43
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Application layer: use cases orchestrating ports. Filled in a later task."""
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""Use case: build the cache fetch-plan for borehole headers (WFS).
|
|
2
|
+
|
|
3
|
+
The WFS backend emits no ETag/Last-Modified, so freshness uses the cheap
|
|
4
|
+
``resultType=hits`` ``numberMatched`` count as a server fingerprint, ANDed with
|
|
5
|
+
the cache's max-age TTL backstop (a same-count content edit is undetectable by
|
|
6
|
+
count alone — documented best-effort, ``force_refresh`` is the only guarantee).
|
|
7
|
+
|
|
8
|
+
This module knows the *strategy* (fingerprint by count, no conditional headers)
|
|
9
|
+
but not HTTP detail — it composes a :class:`FetchPlan` from the injected
|
|
10
|
+
``BoreholeSource`` adapter.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
from austrata.domain.region import Region
|
|
17
|
+
from austrata.infrastructure.dataset_cache import FetchPlan
|
|
18
|
+
from austrata.infrastructure.feature_mapper import borehole_features_to_gdf
|
|
19
|
+
from austrata.ports.data_source import BoreholeSource
|
|
20
|
+
|
|
21
|
+
WFS_SERVICE = "ga-boreholes-wfs"
|
|
22
|
+
WFS_LAYER = "gsmlp:BoreholeView"
|
|
23
|
+
WFS_CITATION = (
|
|
24
|
+
"Geoscience Australia Borehole Database, accessed via the GA Boreholes "
|
|
25
|
+
"WFS (gsmlp:BoreholeView)."
|
|
26
|
+
)
|
|
27
|
+
WFS_LICENSE = "CC BY 4.0"
|
|
28
|
+
WFS_SOURCE_URL = "https://services.ga.gov.au/gis/boreholes/wfs"
|
|
29
|
+
WFS_SERVICE_VERSION = "WFS 2.0.0"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def header_cache_key(region: Region, cql_filter: Optional[str]) -> str:
|
|
33
|
+
"""Stable cache key for a headers query: region geometry + layer + filter."""
|
|
34
|
+
descriptor = f"{WFS_SERVICE}|{WFS_LAYER}|{cql_filter or ''}"
|
|
35
|
+
return f"{region.cache_key()}-{_short_hash(descriptor)}"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def build_header_plan(
|
|
39
|
+
source: BoreholeSource, region: Region, cql_filter: Optional[str]
|
|
40
|
+
) -> FetchPlan:
|
|
41
|
+
"""Compose the FetchPlan the cache uses to fetch/revalidate headers."""
|
|
42
|
+
|
|
43
|
+
def fingerprint(_stored: dict) -> dict:
|
|
44
|
+
# WFS fingerprint is the live count; it does not depend on the stored
|
|
45
|
+
# entry (``_stored``), unlike the ArcGIS etag probe.
|
|
46
|
+
return {"server_fingerprint": {"numberMatched": source.count_headers(region, cql_filter)}}
|
|
47
|
+
|
|
48
|
+
def unchanged(stored: dict, current: dict) -> bool:
|
|
49
|
+
return stored.get("server_fingerprint") == current.get("server_fingerprint")
|
|
50
|
+
|
|
51
|
+
def fetch():
|
|
52
|
+
features = source.fetch_headers(region, cql_filter)
|
|
53
|
+
gdf = borehole_features_to_gdf(features)
|
|
54
|
+
provenance_extra = {
|
|
55
|
+
"server_fingerprint": {"numberMatched": len(features)},
|
|
56
|
+
"citation": WFS_CITATION,
|
|
57
|
+
"license": WFS_LICENSE,
|
|
58
|
+
"source_url": WFS_SOURCE_URL,
|
|
59
|
+
"service_version": WFS_SERVICE_VERSION,
|
|
60
|
+
}
|
|
61
|
+
return gdf, provenance_extra
|
|
62
|
+
|
|
63
|
+
return FetchPlan(
|
|
64
|
+
fetch_fn=fetch,
|
|
65
|
+
fingerprint_fn=fingerprint,
|
|
66
|
+
unchanged_fn=unchanged,
|
|
67
|
+
query={
|
|
68
|
+
"service": WFS_SERVICE,
|
|
69
|
+
"layer": WFS_LAYER,
|
|
70
|
+
"region_wkt": region.geometry.wkt,
|
|
71
|
+
"filter": cql_filter,
|
|
72
|
+
},
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def log_cache_key(kind: str, enos) -> str:
|
|
77
|
+
"""Stable cache key for a log pull, keyed by kind + the sorted ENO set."""
|
|
78
|
+
unique = sorted({int(e) for e in enos})
|
|
79
|
+
descriptor = f"{WFS_SERVICE}|logs|{kind}|{','.join(map(str, unique))}"
|
|
80
|
+
return f"logs-{kind}-{_short_hash(descriptor)}"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def build_log_plan(source: BoreholeSource, kind: str, enos) -> FetchPlan:
|
|
84
|
+
"""Compose a FetchPlan for an ENO-set log pull (stratigraphy/earth-material).
|
|
85
|
+
|
|
86
|
+
Logs for a fixed ENO set are content-stable enough that freshness leans on
|
|
87
|
+
the TTL backstop; no cheap per-ENO fingerprint exists, so no fingerprint_fn
|
|
88
|
+
is supplied (the cache then revalidates purely by max-age).
|
|
89
|
+
"""
|
|
90
|
+
from austrata.infrastructure.feature_mapper import log_features_to_dataframe
|
|
91
|
+
|
|
92
|
+
unique = sorted({int(e) for e in enos})
|
|
93
|
+
|
|
94
|
+
def fetch():
|
|
95
|
+
if kind == "stratigraphy":
|
|
96
|
+
features = source.fetch_stratigraphy(unique)
|
|
97
|
+
elif kind == "earth_material":
|
|
98
|
+
features = source.fetch_earth_material(unique)
|
|
99
|
+
else:
|
|
100
|
+
raise ValueError(f"Unknown log kind {kind!r}.")
|
|
101
|
+
df = log_features_to_dataframe(features)
|
|
102
|
+
provenance_extra = {
|
|
103
|
+
"citation": WFS_CITATION,
|
|
104
|
+
"license": WFS_LICENSE,
|
|
105
|
+
"source_url": WFS_SOURCE_URL,
|
|
106
|
+
"service_version": WFS_SERVICE_VERSION,
|
|
107
|
+
}
|
|
108
|
+
return df, provenance_extra
|
|
109
|
+
|
|
110
|
+
return FetchPlan(
|
|
111
|
+
fetch_fn=fetch,
|
|
112
|
+
query={"service": WFS_SERVICE, "layer": f"logs:{kind}", "enos": unique},
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _short_hash(text: str) -> str:
|
|
117
|
+
import hashlib
|
|
118
|
+
|
|
119
|
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()[:12]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""Use case: build the cache fetch-plan for hydrogeology polygons (ArcGIS).
|
|
2
|
+
|
|
3
|
+
The ArcGIS backend emits an ETag, so freshness uses a conditional probe: send
|
|
4
|
+
``If-None-Match`` with the stored etag and treat a 304 as fresh. The probe is a
|
|
5
|
+
cheap ``returnCountOnly`` query the adapter owns (``probe_etag``), so this module
|
|
6
|
+
stays HTTP-free and the cache stays backend-agnostic.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Optional, Protocol
|
|
11
|
+
|
|
12
|
+
from austrata.domain.region import Region
|
|
13
|
+
from austrata.infrastructure.dataset_cache import FetchPlan
|
|
14
|
+
from austrata.infrastructure.feature_mapper import hydrogeology_features_to_gdf
|
|
15
|
+
|
|
16
|
+
ARCGIS_SERVICE = "ga-hydrogeology-arcgis"
|
|
17
|
+
ARCGIS_LAYER = "Hydrogeology_of_Australia/0"
|
|
18
|
+
ARCGIS_CITATION = (
|
|
19
|
+
"Hydrogeology of Australia, Geoscience Australia, accessed via the "
|
|
20
|
+
"Hydrogeology_of_Australia ArcGIS MapServer (layer 0)."
|
|
21
|
+
)
|
|
22
|
+
ARCGIS_LICENSE = "CC BY 4.0"
|
|
23
|
+
ARCGIS_SOURCE_URL = (
|
|
24
|
+
"https://services.ga.gov.au/gis/rest/services/"
|
|
25
|
+
"Hydrogeology_of_Australia/MapServer"
|
|
26
|
+
)
|
|
27
|
+
ARCGIS_SERVICE_VERSION = "ArcGIS REST MapServer"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class HydrogeologyProbeSource(Protocol):
|
|
31
|
+
"""The hydrogeology source plus the conditional ETag probe this plan needs."""
|
|
32
|
+
|
|
33
|
+
def fetch_units(self, region: Region, where: Optional[str] = None) -> list: ...
|
|
34
|
+
def probe_etag(self, region: Region, where: Optional[str] = None,
|
|
35
|
+
etag: Optional[str] = None) -> dict: ...
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def hydrogeology_cache_key(region: Region, where: Optional[str]) -> str:
|
|
39
|
+
"""Stable cache key for a hydrogeology query: region + layer + where."""
|
|
40
|
+
descriptor = f"{ARCGIS_SERVICE}|{ARCGIS_LAYER}|{where or ''}"
|
|
41
|
+
return f"{region.cache_key()}-{_short_hash(descriptor)}"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def build_hydrogeology_plan(
|
|
45
|
+
source: HydrogeologyProbeSource, region: Region, where: Optional[str]
|
|
46
|
+
) -> FetchPlan:
|
|
47
|
+
"""Compose the FetchPlan the cache uses to fetch/revalidate hydrogeology."""
|
|
48
|
+
|
|
49
|
+
def conditional_headers(stored: dict) -> dict:
|
|
50
|
+
etag = stored.get("etag")
|
|
51
|
+
return {"If-None-Match": etag} if etag else {}
|
|
52
|
+
|
|
53
|
+
def fingerprint(stored: dict) -> dict:
|
|
54
|
+
# A 304 here proves freshness; otherwise we carry the new etag forward.
|
|
55
|
+
return source.probe_etag(region, where, etag=stored.get("etag"))
|
|
56
|
+
|
|
57
|
+
def unchanged(stored: dict, current: dict) -> bool:
|
|
58
|
+
if current.get("not_modified"):
|
|
59
|
+
return True
|
|
60
|
+
cur_etag = current.get("etag")
|
|
61
|
+
return cur_etag is not None and cur_etag == stored.get("etag")
|
|
62
|
+
|
|
63
|
+
def fetch():
|
|
64
|
+
features = source.fetch_units(region, where)
|
|
65
|
+
gdf = hydrogeology_features_to_gdf(features)
|
|
66
|
+
probe = source.probe_etag(region, where)
|
|
67
|
+
provenance_extra = {
|
|
68
|
+
"etag": probe.get("etag"),
|
|
69
|
+
"citation": ARCGIS_CITATION,
|
|
70
|
+
"license": ARCGIS_LICENSE,
|
|
71
|
+
"source_url": ARCGIS_SOURCE_URL,
|
|
72
|
+
"service_version": ARCGIS_SERVICE_VERSION,
|
|
73
|
+
}
|
|
74
|
+
return gdf, provenance_extra
|
|
75
|
+
|
|
76
|
+
return FetchPlan(
|
|
77
|
+
fetch_fn=fetch,
|
|
78
|
+
fingerprint_fn=fingerprint,
|
|
79
|
+
unchanged_fn=unchanged,
|
|
80
|
+
conditional_headers_fn=conditional_headers,
|
|
81
|
+
query={
|
|
82
|
+
"service": ARCGIS_SERVICE,
|
|
83
|
+
"layer": ARCGIS_LAYER,
|
|
84
|
+
"region_wkt": region.geometry.wkt,
|
|
85
|
+
"filter": where,
|
|
86
|
+
},
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _short_hash(text: str) -> str:
|
|
91
|
+
import hashlib
|
|
92
|
+
|
|
93
|
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()[:12]
|