austrata 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. austrata-0.1.0/LICENSE +21 -0
  2. austrata-0.1.0/PKG-INFO +189 -0
  3. austrata-0.1.0/README.md +146 -0
  4. austrata-0.1.0/austrata/__init__.py +43 -0
  5. austrata-0.1.0/austrata/application/__init__.py +1 -0
  6. austrata-0.1.0/austrata/application/fetch_boreholes.py +119 -0
  7. austrata-0.1.0/austrata/application/fetch_hydrogeology.py +93 -0
  8. austrata-0.1.0/austrata/application/fetch_ngis.py +175 -0
  9. austrata-0.1.0/austrata/client.py +178 -0
  10. austrata-0.1.0/austrata/domain/__init__.py +16 -0
  11. austrata-0.1.0/austrata/domain/borehole.py +421 -0
  12. austrata-0.1.0/austrata/domain/coercion.py +37 -0
  13. austrata-0.1.0/austrata/domain/construction.py +78 -0
  14. austrata-0.1.0/austrata/domain/hydrogeology.py +40 -0
  15. austrata-0.1.0/austrata/domain/region.py +145 -0
  16. austrata-0.1.0/austrata/domain/stratigraphy.py +167 -0
  17. austrata-0.1.0/austrata/groundwater_client.py +159 -0
  18. austrata-0.1.0/austrata/infrastructure/__init__.py +1 -0
  19. austrata-0.1.0/austrata/infrastructure/arcgis_rest_client.py +155 -0
  20. austrata-0.1.0/austrata/infrastructure/dataset_cache.py +359 -0
  21. austrata-0.1.0/austrata/infrastructure/feature_mapper.py +141 -0
  22. austrata-0.1.0/austrata/infrastructure/http.py +163 -0
  23. austrata-0.1.0/austrata/infrastructure/ngis_download.py +216 -0
  24. austrata-0.1.0/austrata/infrastructure/ngis_mapper.py +198 -0
  25. austrata-0.1.0/austrata/infrastructure/ngis_optimiser.py +191 -0
  26. austrata-0.1.0/austrata/infrastructure/ngis_sources.py +157 -0
  27. austrata-0.1.0/austrata/infrastructure/ogc_wfs_client.py +218 -0
  28. austrata-0.1.0/austrata/ngis_client.py +148 -0
  29. austrata-0.1.0/austrata/ports/__init__.py +1 -0
  30. austrata-0.1.0/austrata/ports/cache.py +64 -0
  31. austrata-0.1.0/austrata/ports/data_source.py +59 -0
  32. austrata-0.1.0/austrata/py.typed +0 -0
  33. austrata-0.1.0/austrata.egg-info/PKG-INFO +189 -0
  34. austrata-0.1.0/austrata.egg-info/SOURCES.txt +56 -0
  35. austrata-0.1.0/austrata.egg-info/dependency_links.txt +1 -0
  36. austrata-0.1.0/austrata.egg-info/requires.txt +21 -0
  37. austrata-0.1.0/austrata.egg-info/top_level.txt +1 -0
  38. austrata-0.1.0/pyproject.toml +96 -0
  39. austrata-0.1.0/setup.cfg +4 -0
  40. austrata-0.1.0/tests/test_arcgis_client.py +126 -0
  41. austrata-0.1.0/tests/test_arcgis_live.py +48 -0
  42. austrata-0.1.0/tests/test_borehole.py +222 -0
  43. austrata-0.1.0/tests/test_client.py +237 -0
  44. austrata-0.1.0/tests/test_client_live.py +53 -0
  45. austrata-0.1.0/tests/test_construction.py +58 -0
  46. austrata-0.1.0/tests/test_dataset_cache.py +239 -0
  47. austrata-0.1.0/tests/test_fetch_ngis.py +186 -0
  48. austrata-0.1.0/tests/test_ga_server_live.py +715 -0
  49. austrata-0.1.0/tests/test_groundwater_client.py +257 -0
  50. austrata-0.1.0/tests/test_http.py +79 -0
  51. austrata-0.1.0/tests/test_intervals.py +116 -0
  52. austrata-0.1.0/tests/test_ngis_client.py +280 -0
  53. austrata-0.1.0/tests/test_ngis_download.py +119 -0
  54. austrata-0.1.0/tests/test_ngis_live.py +156 -0
  55. austrata-0.1.0/tests/test_ngis_optimiser.py +171 -0
  56. austrata-0.1.0/tests/test_region.py +85 -0
  57. austrata-0.1.0/tests/test_wfs_client.py +106 -0
  58. austrata-0.1.0/tests/test_wfs_live.py +51 -0
austrata-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Sia Ghelichkhan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,189 @@
1
+ Metadata-Version: 2.4
2
+ Name: austrata
3
+ Version: 0.1.0
4
+ Summary: Access Geoscience Australia borehole and hydrogeology data with a provenance-aware local cache
5
+ Author-email: Sia Ghelichkhan <siavash.ghelichkhan@anu.edu.au>
6
+ License: MIT
7
+ Project-URL: Repository, https://github.com/g-adopt/austrata
8
+ Project-URL: Issues, https://github.com/g-adopt/austrata/issues
9
+ Keywords: geoscience australia,boreholes,hydrogeology,stratigraphy,geospatial
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Topic :: Scientific/Engineering
18
+ Classifier: Topic :: Scientific/Engineering :: GIS
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: geopandas>=0.14
23
+ Requires-Dist: shapely>=2.0
24
+ Requires-Dist: pyproj>=3.5
25
+ Requires-Dist: pyogrio>=0.7
26
+ Requires-Dist: fiona>=1.9
27
+ Requires-Dist: pyarrow>=14.0
28
+ Requires-Dist: requests>=2.28
29
+ Requires-Dist: tenacity>=8.2
30
+ Requires-Dist: platformdirs>=3.0
31
+ Requires-Dist: filelock>=3.12
32
+ Requires-Dist: tqdm>=4.64
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=7.0; extra == "dev"
35
+ Requires-Dist: pytest-cov>=3.0; extra == "dev"
36
+ Requires-Dist: vcrpy>=5.0; extra == "dev"
37
+ Requires-Dist: responses>=0.23; extra == "dev"
38
+ Requires-Dist: flake8>=5.0; extra == "dev"
39
+ Requires-Dist: black>=22.0; extra == "dev"
40
+ Requires-Dist: mypy>=0.990; extra == "dev"
41
+ Requires-Dist: types-requests; extra == "dev"
42
+ Dynamic: license-file
43
+
44
+ # austrata
45
+
46
+ [![Tests](https://github.com/g-adopt/austrata/actions/workflows/tests.yml/badge.svg)](https://github.com/g-adopt/austrata/actions/workflows/tests.yml)
47
+ [![PyPI](https://img.shields.io/pypi/v/austrata.svg)](https://pypi.org/project/austrata/)
48
+ [![Python](https://img.shields.io/pypi/pyversions/austrata.svg)](https://pypi.org/project/austrata/)
49
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
50
+
51
+ Access Geoscience Australia borehole and hydrogeology data through their open
52
+ OGC/ArcGIS web services, with a provenance-aware local cache.
53
+
54
+ `austrata` models boreholes as first-class objects (a header plus downhole
55
+ stratigraphy and earth-material logs), lets you pull every bore inside an
56
+ arbitrary polygon or bounding box, and exposes the Hydrogeology of Australia
57
+ polygon layer to overlay. It talks to two backends behind one API: the GA
58
+ boreholes GeoServer (WFS) and the Hydrogeology of Australia ArcGIS MapServer.
59
+ Results are cached locally as GeoParquet with a provenance manifest, and
60
+ revalidated before refetching so repeated queries are cheap and reproducible.
61
+
62
+ Everything is returned in lon/lat (EPSG:4283, GDA94 geographic). Map projection
63
+ and mesh generation are deliberately out of scope — those live in the companion
64
+ `omega` package, which consumes this one.
65
+
66
+ ## Installation
67
+
68
+ ```bash
69
+ pip install -e ".[dev]"
70
+ ```
71
+
72
+ Requires Python 3.11+. Runtime dependencies are geopandas, shapely, pyproj,
73
+ pyogrio, pyarrow, requests, tenacity, platformdirs, and filelock.
74
+
75
+ ## Quickstart
76
+
77
+ ```python
78
+ from austrata import GADataClient
79
+ from shapely.geometry import box
80
+
81
+ ga = GADataClient() # cache defaults to the OS user cache dir
82
+
83
+ # Boreholes inside a bounding box (lon/lat). Paginated and cached automatically.
84
+ bores = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1))
85
+ print(len(bores), "boreholes")
86
+ gdf = bores.to_geodataframe() # headers as a GeoDataFrame (EPSG:4283)
87
+
88
+ # Or pass any shapely geometry as the region.
89
+ bores = ga.boreholes(region=box(148.9, -35.6, 149.3, -35.1))
90
+
91
+ # Load downhole logs for the whole collection in one shot (ENO-batched, cached).
92
+ bores.load_logs("stratigraphy")
93
+ for b in bores:
94
+ for interval in b.stratigraphy: # list of StratigraphyInterval
95
+ if interval.valid:
96
+ print(b.name, interval.top_depth, interval.bottom_depth, interval.unit)
97
+
98
+ bores.load_logs("earth_material") # b.earth_material is then populated
99
+
100
+ # Export the loaded logs as a tidy GeoDataFrame (one row per interval, borehole
101
+ # point geometry, EPSG:4283). Save with geopandas: .to_file('x.gpkg') / .to_csv(...).
102
+ strat = bores.stratigraphy_geodataframe()
103
+ earth = bores.earth_material_geodataframe()
104
+
105
+ # A single borehole by ENO or PID.
106
+ one = ga.borehole("35147")
107
+
108
+ # Hydrogeology polygons to overlay, as a GeoDataFrame.
109
+ hydro = ga.hydrogeology(bbox=(148.9, -35.6, 149.3, -35.1))
110
+
111
+ # A backend filter passes straight through.
112
+ diamond = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1), filter="drillingMethod='Diamond'")
113
+ ```
114
+
115
+ ### Dry-run counts
116
+
117
+ Pass `count_only=True` to get the number of features without downloading them
118
+ (uses the cheap `resultType=hits` / `returnCountOnly` paths):
119
+
120
+ ```python
121
+ n_bores = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1), count_only=True)
122
+ n_units = ga.hydrogeology(bbox=(148.9, -35.6, 149.3, -35.1), count_only=True)
123
+ ```
124
+
125
+ ### Caching, freshness, and offline use
126
+
127
+ Each logical query is cached as a `<hash>.parquet` file plus an entry in a
128
+ `manifest.json`, in an OS-appropriate user cache directory
129
+ (e.g. `~/Library/Caches/austrata` on macOS). Override the location with the
130
+ `cache_dir=` argument or the `AUSTRATA_DATA_DIR` environment variable.
131
+
132
+ On a repeat query `austrata` revalidates rather than blindly refetching: the
133
+ ArcGIS path uses the service `ETag` (conditional `If-None-Match`), and the WFS
134
+ path — which exposes no ETag — compares the `numberMatched` count as a cheap
135
+ fingerprint. Both fall back to a max-age TTL (30 days by default), so a
136
+ same-count content edit is eventually picked up. `force_refresh=True` is the
137
+ only hard guarantee of a fresh pull.
138
+
139
+ ```python
140
+ ga = GADataClient(offline=True) # never touch the network; serve cache or raise
141
+ ga = GADataClient(max_age=7 * 24 * 3600) # revalidate-by-refetch weekly
142
+ bores = ga.boreholes(bbox=..., force_refresh=True)
143
+
144
+ # Inspect or clear the cache.
145
+ ga.cache.info() # dir, entry count, total bytes, per-entry detail
146
+ ga.cache.list() # cached keys
147
+ ga.cache.clear() # wipe everything (or clear(key) for one)
148
+ ```
149
+
150
+ To prefetch for offline/field use, run the queries you need once while online;
151
+ they land in the cache and an `offline=True` client serves them thereafter.
152
+
153
+ ### Citing the data
154
+
155
+ Geoscience Australia publishes this data under CC BY 4.0. `austrata` records the
156
+ provenance of every cached query so you can cite it with its access date:
157
+
158
+ ```python
159
+ print(bores.citation()) # citation string incl. "Accessed YYYY-MM-DD"
160
+ bores.provenance() # dict: source_url, license, fetched_at, ...
161
+
162
+ from austrata import hydrogeology_citation
163
+ print(hydrogeology_citation(hydro))
164
+ ```
165
+
166
+ ## The lon/lat (GDA94) contract
167
+
168
+ Both services are native EPSG:4283 (GDA94 geographic). `austrata` pins this end to
169
+ end: the WFS bbox carries an explicit `EPSG:4283` suffix, and ArcGIS queries
170
+ force `outSR=4283` (its GeoJSON otherwise silently defaults to WGS84). Every
171
+ geometry you get back is lon/lat in GDA94 — no reprojection happens here.
172
+
173
+ ## Architecture
174
+
175
+ The package follows clean-architecture / DDD layering (see `DESIGN.md`):
176
+
177
+ - `domain/` — pure value objects and entities (`Region`, `Borehole`,
178
+ `BoreholeCollection`, `StratigraphyInterval`, `EarthMaterialInterval`,
179
+ `HydrogeologyUnit`). No I/O.
180
+ - `ports/` — the interfaces the application layer depends on (`BoreholeSource`,
181
+ `HydrogeologySource`, `DatasetCache`).
182
+ - `application/` — use cases that build per-backend cache fetch-plans.
183
+ - `infrastructure/` — the HTTP client, the WFS and ArcGIS adapters, the feature
184
+ mappers, and the dataset cache.
185
+ - `client.py` — the `GADataClient` facade that wires it together.
186
+
187
+ ## License
188
+
189
+ MIT (the code). The data accessed through it is © Geoscience Australia, CC BY 4.0.
@@ -0,0 +1,146 @@
1
+ # austrata
2
+
3
+ [![Tests](https://github.com/g-adopt/austrata/actions/workflows/tests.yml/badge.svg)](https://github.com/g-adopt/austrata/actions/workflows/tests.yml)
4
+ [![PyPI](https://img.shields.io/pypi/v/austrata.svg)](https://pypi.org/project/austrata/)
5
+ [![Python](https://img.shields.io/pypi/pyversions/austrata.svg)](https://pypi.org/project/austrata/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
7
+
8
+ Access Geoscience Australia borehole and hydrogeology data through their open
9
+ OGC/ArcGIS web services, with a provenance-aware local cache.
10
+
11
+ `austrata` models boreholes as first-class objects (a header plus downhole
12
+ stratigraphy and earth-material logs), lets you pull every bore inside an
13
+ arbitrary polygon or bounding box, and exposes the Hydrogeology of Australia
14
+ polygon layer to overlay. It talks to two backends behind one API: the GA
15
+ boreholes GeoServer (WFS) and the Hydrogeology of Australia ArcGIS MapServer.
16
+ Results are cached locally as GeoParquet with a provenance manifest, and
17
+ revalidated before refetching so repeated queries are cheap and reproducible.
18
+
19
+ Everything is returned in lon/lat (EPSG:4283, GDA94 geographic). Map projection
20
+ and mesh generation are deliberately out of scope — those live in the companion
21
+ `omega` package, which consumes this one.
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ pip install -e ".[dev]"
27
+ ```
28
+
29
+ Requires Python 3.11+. Runtime dependencies are geopandas, shapely, pyproj,
30
+ pyogrio, pyarrow, requests, tenacity, platformdirs, and filelock.
31
+
32
+ ## Quickstart
33
+
34
+ ```python
35
+ from austrata import GADataClient
36
+ from shapely.geometry import box
37
+
38
+ ga = GADataClient() # cache defaults to the OS user cache dir
39
+
40
+ # Boreholes inside a bounding box (lon/lat). Paginated and cached automatically.
41
+ bores = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1))
42
+ print(len(bores), "boreholes")
43
+ gdf = bores.to_geodataframe() # headers as a GeoDataFrame (EPSG:4283)
44
+
45
+ # Or pass any shapely geometry as the region.
46
+ bores = ga.boreholes(region=box(148.9, -35.6, 149.3, -35.1))
47
+
48
+ # Load downhole logs for the whole collection in one shot (ENO-batched, cached).
49
+ bores.load_logs("stratigraphy")
50
+ for b in bores:
51
+ for interval in b.stratigraphy: # list of StratigraphyInterval
52
+ if interval.valid:
53
+ print(b.name, interval.top_depth, interval.bottom_depth, interval.unit)
54
+
55
+ bores.load_logs("earth_material") # b.earth_material is then populated
56
+
57
+ # Export the loaded logs as a tidy GeoDataFrame (one row per interval, borehole
58
+ # point geometry, EPSG:4283). Save with geopandas: .to_file('x.gpkg') / .to_csv(...).
59
+ strat = bores.stratigraphy_geodataframe()
60
+ earth = bores.earth_material_geodataframe()
61
+
62
+ # A single borehole by ENO or PID.
63
+ one = ga.borehole("35147")
64
+
65
+ # Hydrogeology polygons to overlay, as a GeoDataFrame.
66
+ hydro = ga.hydrogeology(bbox=(148.9, -35.6, 149.3, -35.1))
67
+
68
+ # A backend filter passes straight through.
69
+ diamond = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1), filter="drillingMethod='Diamond'")
70
+ ```
71
+
72
+ ### Dry-run counts
73
+
74
+ Pass `count_only=True` to get the number of features without downloading them
75
+ (uses the cheap `resultType=hits` / `returnCountOnly` paths):
76
+
77
+ ```python
78
+ n_bores = ga.boreholes(bbox=(148.9, -35.6, 149.3, -35.1), count_only=True)
79
+ n_units = ga.hydrogeology(bbox=(148.9, -35.6, 149.3, -35.1), count_only=True)
80
+ ```
81
+
82
+ ### Caching, freshness, and offline use
83
+
84
+ Each logical query is cached as a `<hash>.parquet` file plus an entry in a
85
+ `manifest.json`, in an OS-appropriate user cache directory
86
+ (e.g. `~/Library/Caches/austrata` on macOS). Override the location with the
87
+ `cache_dir=` argument or the `AUSTRATA_DATA_DIR` environment variable.
88
+
89
+ On a repeat query `austrata` revalidates rather than blindly refetching: the
90
+ ArcGIS path uses the service `ETag` (conditional `If-None-Match`), and the WFS
91
+ path — which exposes no ETag — compares the `numberMatched` count as a cheap
92
+ fingerprint. Both fall back to a max-age TTL (30 days by default), so a
93
+ same-count content edit is eventually picked up. `force_refresh=True` is the
94
+ only hard guarantee of a fresh pull.
95
+
96
+ ```python
97
+ ga = GADataClient(offline=True) # never touch the network; serve cache or raise
98
+ ga = GADataClient(max_age=7 * 24 * 3600) # revalidate-by-refetch weekly
99
+ bores = ga.boreholes(bbox=..., force_refresh=True)
100
+
101
+ # Inspect or clear the cache.
102
+ ga.cache.info() # dir, entry count, total bytes, per-entry detail
103
+ ga.cache.list() # cached keys
104
+ ga.cache.clear() # wipe everything (or clear(key) for one)
105
+ ```
106
+
107
+ To prefetch for offline/field use, run the queries you need once while online;
108
+ they land in the cache and an `offline=True` client serves them thereafter.
109
+
110
+ ### Citing the data
111
+
112
+ Geoscience Australia publishes this data under CC BY 4.0. `austrata` records the
113
+ provenance of every cached query so you can cite it with its access date:
114
+
115
+ ```python
116
+ print(bores.citation()) # citation string incl. "Accessed YYYY-MM-DD"
117
+ bores.provenance() # dict: source_url, license, fetched_at, ...
118
+
119
+ from austrata import hydrogeology_citation
120
+ print(hydrogeology_citation(hydro))
121
+ ```
122
+
123
+ ## The lon/lat (GDA94) contract
124
+
125
+ Both services are native EPSG:4283 (GDA94 geographic). `austrata` pins this end to
126
+ end: the WFS bbox carries an explicit `EPSG:4283` suffix, and ArcGIS queries
127
+ force `outSR=4283` (its GeoJSON otherwise silently defaults to WGS84). Every
128
+ geometry you get back is lon/lat in GDA94 — no reprojection happens here.
129
+
130
+ ## Architecture
131
+
132
+ The package follows clean-architecture / DDD layering (see `DESIGN.md`):
133
+
134
+ - `domain/` — pure value objects and entities (`Region`, `Borehole`,
135
+ `BoreholeCollection`, `StratigraphyInterval`, `EarthMaterialInterval`,
136
+ `HydrogeologyUnit`). No I/O.
137
+ - `ports/` — the interfaces the application layer depends on (`BoreholeSource`,
138
+ `HydrogeologySource`, `DatasetCache`).
139
+ - `application/` — use cases that build per-backend cache fetch-plans.
140
+ - `infrastructure/` — the HTTP client, the WFS and ArcGIS adapters, the feature
141
+ mappers, and the dataset cache.
142
+ - `client.py` — the `GADataClient` facade that wires it together.
143
+
144
+ ## License
145
+
146
+ MIT (the code). The data accessed through it is © Geoscience Australia, CC BY 4.0.
@@ -0,0 +1,43 @@
1
+ """austrata — Geoscience Australia borehole and hydrogeology data access.
2
+
3
+ The main entry point is :class:`GADataClient`. The domain value objects are
4
+ exported too for callers that want to work with the typed model directly.
5
+ """
6
+ import logging as _logging
7
+
8
+ # Structured logging lives under the "austrata" namespace and is silent by
9
+ # default (a NullHandler), per library best practice. Applications opt in via
10
+ # logging.getLogger("austrata").setLevel(...) and attach their own handler.
11
+ # Configured before submodule imports so their module-level getLogger calls
12
+ # inherit a silenced parent regardless of import order.
13
+ _logging.getLogger("austrata").addHandler(_logging.NullHandler())
14
+
15
+ from austrata.domain.region import Region # noqa: E402
16
+ from austrata.domain.borehole import Borehole, BoreholeCollection # noqa: E402
17
+ from austrata.domain.stratigraphy import StratigraphyInterval, EarthMaterialInterval # noqa: E402
18
+ from austrata.domain.construction import ConstructionInterval # noqa: E402
19
+ from austrata.domain.hydrogeology import HydrogeologyUnit # noqa: E402
20
+ from austrata.client import ( # noqa: E402
21
+ GADataClient,
22
+ hydrogeology_citation,
23
+ hydrogeology_provenance,
24
+ )
25
+ from austrata.ngis_client import NGISClient # noqa: E402
26
+ from austrata.groundwater_client import GroundwaterClient # noqa: E402
27
+
28
+ __all__ = [
29
+ "GADataClient",
30
+ "NGISClient",
31
+ "GroundwaterClient",
32
+ "Region",
33
+ "Borehole",
34
+ "BoreholeCollection",
35
+ "StratigraphyInterval",
36
+ "EarthMaterialInterval",
37
+ "ConstructionInterval",
38
+ "HydrogeologyUnit",
39
+ "hydrogeology_provenance",
40
+ "hydrogeology_citation",
41
+ ]
42
+
43
+ __version__ = "0.1.0"
@@ -0,0 +1 @@
1
+ """Application layer: use cases orchestrating ports. Filled in a later task."""
@@ -0,0 +1,119 @@
1
+ """Use case: build the cache fetch-plan for borehole headers (WFS).
2
+
3
+ The WFS backend emits no ETag/Last-Modified, so freshness uses the cheap
4
+ ``resultType=hits`` ``numberMatched`` count as a server fingerprint, ANDed with
5
+ the cache's max-age TTL backstop (a same-count content edit is undetectable by
6
+ count alone — documented best-effort, ``force_refresh`` is the only guarantee).
7
+
8
+ This module knows the *strategy* (fingerprint by count, no conditional headers)
9
+ but not HTTP detail — it composes a :class:`FetchPlan` from the injected
10
+ ``BoreholeSource`` adapter.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ from typing import Optional
15
+
16
+ from austrata.domain.region import Region
17
+ from austrata.infrastructure.dataset_cache import FetchPlan
18
+ from austrata.infrastructure.feature_mapper import borehole_features_to_gdf
19
+ from austrata.ports.data_source import BoreholeSource
20
+
21
+ WFS_SERVICE = "ga-boreholes-wfs"
22
+ WFS_LAYER = "gsmlp:BoreholeView"
23
+ WFS_CITATION = (
24
+ "Geoscience Australia Borehole Database, accessed via the GA Boreholes "
25
+ "WFS (gsmlp:BoreholeView)."
26
+ )
27
+ WFS_LICENSE = "CC BY 4.0"
28
+ WFS_SOURCE_URL = "https://services.ga.gov.au/gis/boreholes/wfs"
29
+ WFS_SERVICE_VERSION = "WFS 2.0.0"
30
+
31
+
32
+ def header_cache_key(region: Region, cql_filter: Optional[str]) -> str:
33
+ """Stable cache key for a headers query: region geometry + layer + filter."""
34
+ descriptor = f"{WFS_SERVICE}|{WFS_LAYER}|{cql_filter or ''}"
35
+ return f"{region.cache_key()}-{_short_hash(descriptor)}"
36
+
37
+
38
+ def build_header_plan(
39
+ source: BoreholeSource, region: Region, cql_filter: Optional[str]
40
+ ) -> FetchPlan:
41
+ """Compose the FetchPlan the cache uses to fetch/revalidate headers."""
42
+
43
+ def fingerprint(_stored: dict) -> dict:
44
+ # WFS fingerprint is the live count; it does not depend on the stored
45
+ # entry (``_stored``), unlike the ArcGIS etag probe.
46
+ return {"server_fingerprint": {"numberMatched": source.count_headers(region, cql_filter)}}
47
+
48
+ def unchanged(stored: dict, current: dict) -> bool:
49
+ return stored.get("server_fingerprint") == current.get("server_fingerprint")
50
+
51
+ def fetch():
52
+ features = source.fetch_headers(region, cql_filter)
53
+ gdf = borehole_features_to_gdf(features)
54
+ provenance_extra = {
55
+ "server_fingerprint": {"numberMatched": len(features)},
56
+ "citation": WFS_CITATION,
57
+ "license": WFS_LICENSE,
58
+ "source_url": WFS_SOURCE_URL,
59
+ "service_version": WFS_SERVICE_VERSION,
60
+ }
61
+ return gdf, provenance_extra
62
+
63
+ return FetchPlan(
64
+ fetch_fn=fetch,
65
+ fingerprint_fn=fingerprint,
66
+ unchanged_fn=unchanged,
67
+ query={
68
+ "service": WFS_SERVICE,
69
+ "layer": WFS_LAYER,
70
+ "region_wkt": region.geometry.wkt,
71
+ "filter": cql_filter,
72
+ },
73
+ )
74
+
75
+
76
+ def log_cache_key(kind: str, enos) -> str:
77
+ """Stable cache key for a log pull, keyed by kind + the sorted ENO set."""
78
+ unique = sorted({int(e) for e in enos})
79
+ descriptor = f"{WFS_SERVICE}|logs|{kind}|{','.join(map(str, unique))}"
80
+ return f"logs-{kind}-{_short_hash(descriptor)}"
81
+
82
+
83
+ def build_log_plan(source: BoreholeSource, kind: str, enos) -> FetchPlan:
84
+ """Compose a FetchPlan for an ENO-set log pull (stratigraphy/earth-material).
85
+
86
+ Logs for a fixed ENO set are content-stable enough that freshness leans on
87
+ the TTL backstop; no cheap per-ENO fingerprint exists, so no fingerprint_fn
88
+ is supplied (the cache then revalidates purely by max-age).
89
+ """
90
+ from austrata.infrastructure.feature_mapper import log_features_to_dataframe
91
+
92
+ unique = sorted({int(e) for e in enos})
93
+
94
+ def fetch():
95
+ if kind == "stratigraphy":
96
+ features = source.fetch_stratigraphy(unique)
97
+ elif kind == "earth_material":
98
+ features = source.fetch_earth_material(unique)
99
+ else:
100
+ raise ValueError(f"Unknown log kind {kind!r}.")
101
+ df = log_features_to_dataframe(features)
102
+ provenance_extra = {
103
+ "citation": WFS_CITATION,
104
+ "license": WFS_LICENSE,
105
+ "source_url": WFS_SOURCE_URL,
106
+ "service_version": WFS_SERVICE_VERSION,
107
+ }
108
+ return df, provenance_extra
109
+
110
+ return FetchPlan(
111
+ fetch_fn=fetch,
112
+ query={"service": WFS_SERVICE, "layer": f"logs:{kind}", "enos": unique},
113
+ )
114
+
115
+
116
+ def _short_hash(text: str) -> str:
117
+ import hashlib
118
+
119
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()[:12]
@@ -0,0 +1,93 @@
1
+ """Use case: build the cache fetch-plan for hydrogeology polygons (ArcGIS).
2
+
3
+ The ArcGIS backend emits an ETag, so freshness uses a conditional probe: send
4
+ ``If-None-Match`` with the stored etag and treat a 304 as fresh. The probe is a
5
+ cheap ``returnCountOnly`` query the adapter owns (``probe_etag``), so this module
6
+ stays HTTP-free and the cache stays backend-agnostic.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from typing import Optional, Protocol
11
+
12
+ from austrata.domain.region import Region
13
+ from austrata.infrastructure.dataset_cache import FetchPlan
14
+ from austrata.infrastructure.feature_mapper import hydrogeology_features_to_gdf
15
+
16
+ ARCGIS_SERVICE = "ga-hydrogeology-arcgis"
17
+ ARCGIS_LAYER = "Hydrogeology_of_Australia/0"
18
+ ARCGIS_CITATION = (
19
+ "Hydrogeology of Australia, Geoscience Australia, accessed via the "
20
+ "Hydrogeology_of_Australia ArcGIS MapServer (layer 0)."
21
+ )
22
+ ARCGIS_LICENSE = "CC BY 4.0"
23
+ ARCGIS_SOURCE_URL = (
24
+ "https://services.ga.gov.au/gis/rest/services/"
25
+ "Hydrogeology_of_Australia/MapServer"
26
+ )
27
+ ARCGIS_SERVICE_VERSION = "ArcGIS REST MapServer"
28
+
29
+
30
+ class HydrogeologyProbeSource(Protocol):
31
+ """The hydrogeology source plus the conditional ETag probe this plan needs."""
32
+
33
+ def fetch_units(self, region: Region, where: Optional[str] = None) -> list: ...
34
+ def probe_etag(self, region: Region, where: Optional[str] = None,
35
+ etag: Optional[str] = None) -> dict: ...
36
+
37
+
38
+ def hydrogeology_cache_key(region: Region, where: Optional[str]) -> str:
39
+ """Stable cache key for a hydrogeology query: region + layer + where."""
40
+ descriptor = f"{ARCGIS_SERVICE}|{ARCGIS_LAYER}|{where or ''}"
41
+ return f"{region.cache_key()}-{_short_hash(descriptor)}"
42
+
43
+
44
+ def build_hydrogeology_plan(
45
+ source: HydrogeologyProbeSource, region: Region, where: Optional[str]
46
+ ) -> FetchPlan:
47
+ """Compose the FetchPlan the cache uses to fetch/revalidate hydrogeology."""
48
+
49
+ def conditional_headers(stored: dict) -> dict:
50
+ etag = stored.get("etag")
51
+ return {"If-None-Match": etag} if etag else {}
52
+
53
+ def fingerprint(stored: dict) -> dict:
54
+ # A 304 here proves freshness; otherwise we carry the new etag forward.
55
+ return source.probe_etag(region, where, etag=stored.get("etag"))
56
+
57
+ def unchanged(stored: dict, current: dict) -> bool:
58
+ if current.get("not_modified"):
59
+ return True
60
+ cur_etag = current.get("etag")
61
+ return cur_etag is not None and cur_etag == stored.get("etag")
62
+
63
+ def fetch():
64
+ features = source.fetch_units(region, where)
65
+ gdf = hydrogeology_features_to_gdf(features)
66
+ probe = source.probe_etag(region, where)
67
+ provenance_extra = {
68
+ "etag": probe.get("etag"),
69
+ "citation": ARCGIS_CITATION,
70
+ "license": ARCGIS_LICENSE,
71
+ "source_url": ARCGIS_SOURCE_URL,
72
+ "service_version": ARCGIS_SERVICE_VERSION,
73
+ }
74
+ return gdf, provenance_extra
75
+
76
+ return FetchPlan(
77
+ fetch_fn=fetch,
78
+ fingerprint_fn=fingerprint,
79
+ unchanged_fn=unchanged,
80
+ conditional_headers_fn=conditional_headers,
81
+ query={
82
+ "service": ARCGIS_SERVICE,
83
+ "layer": ARCGIS_LAYER,
84
+ "region_wkt": region.geometry.wkt,
85
+ "filter": where,
86
+ },
87
+ )
88
+
89
+
90
+ def _short_hash(text: str) -> str:
91
+ import hashlib
92
+
93
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()[:12]