aihydro-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. aihydro_core-0.1.0/PKG-INFO +46 -0
  2. aihydro_core-0.1.0/README.md +22 -0
  3. aihydro_core-0.1.0/aihydro_core/__init__.py +17 -0
  4. aihydro_core-0.1.0/aihydro_core/features/__init__.py +5 -0
  5. aihydro_core-0.1.0/aihydro_core/features/compute.py +254 -0
  6. aihydro_core-0.1.0/aihydro_core/features/registry.py +183 -0
  7. aihydro_core-0.1.0/aihydro_core/jobs/__init__.py +375 -0
  8. aihydro_core-0.1.0/aihydro_core/primitives/__init__.py +13 -0
  9. aihydro_core-0.1.0/aihydro_core/primitives/errors.py +58 -0
  10. aihydro_core-0.1.0/aihydro_core/primitives/geometry.py +80 -0
  11. aihydro_core-0.1.0/aihydro_core/primitives/hashing.py +46 -0
  12. aihydro_core-0.1.0/aihydro_core/primitives/provenance.py +64 -0
  13. aihydro_core-0.1.0/aihydro_core/store/__init__.py +4 -0
  14. aihydro_core-0.1.0/aihydro_core/store/memory.py +97 -0
  15. aihydro_core-0.1.0/aihydro_core/store/protocol.py +120 -0
  16. aihydro_core-0.1.0/aihydro_core.egg-info/PKG-INFO +46 -0
  17. aihydro_core-0.1.0/aihydro_core.egg-info/SOURCES.txt +26 -0
  18. aihydro_core-0.1.0/aihydro_core.egg-info/dependency_links.txt +1 -0
  19. aihydro_core-0.1.0/aihydro_core.egg-info/requires.txt +5 -0
  20. aihydro_core-0.1.0/aihydro_core.egg-info/top_level.txt +1 -0
  21. aihydro_core-0.1.0/pyproject.toml +65 -0
  22. aihydro_core-0.1.0/setup.cfg +4 -0
  23. aihydro_core-0.1.0/tests/test_feature_tool.py +383 -0
  24. aihydro_core-0.1.0/tests/test_features.py +140 -0
  25. aihydro_core-0.1.0/tests/test_jobs_hook.py +72 -0
  26. aihydro_core-0.1.0/tests/test_layering.py +69 -0
  27. aihydro_core-0.1.0/tests/test_primitives.py +84 -0
  28. aihydro_core-0.1.0/tests/test_store.py +108 -0
@@ -0,0 +1,46 @@
1
+ Metadata-Version: 2.4
2
+ Name: aihydro-core
3
+ Version: 0.1.0
4
+ Summary: AI-Hydro robustness substrate: hashing, provenance, Store protocol, async jobs, feature addressing
5
+ Author-email: Mohammad Galib <mgalib@purdue.edu>
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/AI-Hydro/aihydro-core
8
+ Project-URL: Repository, https://github.com/AI-Hydro/aihydro-core
9
+ Keywords: hydrology,agentic,MCP,SDK,robustness
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: Apache Software License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Requires-Python: >=3.9
19
+ Description-Content-Type: text/markdown
20
+ Provides-Extra: dev
21
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
22
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
23
+ Requires-Dist: import-linter>=2.0; extra == "dev"
24
+
25
+ # aihydro-core
26
+
27
+ Robustness substrate for the AI-Hydro ecosystem.
28
+
29
+ Provides the single hashing and provenance vocabulary (`content_hash`, `param_hash`,
30
+ `ProvenanceRecord`) shared across all AI-Hydro packages, plus the `Store` protocol and
31
+ `AsyncJobRegistry` used by higher-level layers.
32
+
33
+ **Zero heavy dependencies** — pure Python stdlib. Designed to be the lowest layer in the
34
+ stack so it can be safely depended on by any AI-Hydro package without pulling in numpy,
35
+ pandas, or geo libraries.
36
+
37
+ ## Install
38
+
39
+ ```bash
40
+ pip install aihydro-core
41
+ ```
42
+
43
+ ## Part of the AI-Hydro ecosystem
44
+
45
+ - [aihydro-data](https://github.com/AI-Hydro/AIhydro-data) — global hydrology dataverse
46
+ - [AI-Hydro](https://github.com/AI-Hydro/AI-Hydro) — AI-native hydrologic modelling platform
@@ -0,0 +1,22 @@
1
+ # aihydro-core
2
+
3
+ Robustness substrate for the AI-Hydro ecosystem.
4
+
5
+ Provides the single hashing and provenance vocabulary (`content_hash`, `param_hash`,
6
+ `ProvenanceRecord`) shared across all AI-Hydro packages, plus the `Store` protocol and
7
+ `AsyncJobRegistry` used by higher-level layers.
8
+
9
+ **Zero heavy dependencies** — pure Python stdlib. Designed to be the lowest layer in the
10
+ stack so it can be safely depended on by any AI-Hydro package without pulling in numpy,
11
+ pandas, or geo libraries.
12
+
13
+ ## Install
14
+
15
+ ```bash
16
+ pip install aihydro-core
17
+ ```
18
+
19
+ ## Part of the AI-Hydro ecosystem
20
+
21
+ - [aihydro-data](https://github.com/AI-Hydro/AIhydro-data) — global hydrology dataverse
22
+ - [AI-Hydro](https://github.com/AI-Hydro/AI-Hydro) — AI-native hydrologic modelling platform
@@ -0,0 +1,17 @@
1
+ """
2
+ aihydro-core — the robustness substrate for AI-Hydro tools.
3
+
4
+ Four cross-cutting blocks, each solving one axis of tool reliability:
5
+
6
+ primitives — hashing, provenance (Artifact), errors (ToolError)
7
+ store — Store protocol (persistence-agnostic keyed result + feature store)
8
+ jobs — async execution (start/status/result/cancel/list + PID registry)
9
+ features — geometry addressing (Feature registry + @feature_tool decorator)
10
+
11
+ Domain packages (aihydro-tools, aihydro-data) depend on this package.
12
+ This package has zero heavy dependencies — it is stdlib only.
13
+
14
+ See AIHYDRO_CORE_DESIGN.md (local-docs/) for the full architecture.
15
+ """
16
+
17
+ __version__ = "0.1.0"
@@ -0,0 +1,5 @@
1
+ from .registry import FeatureRegistry
2
+ from ..primitives.geometry import Feature
3
+ from ..primitives.errors import FeatureNotFoundError
4
+
5
+ __all__ = ["FeatureRegistry", "Feature", "FeatureNotFoundError"]
@@ -0,0 +1,254 @@
1
+ """
2
+ feature_compute — the @feature_tool decorator.
3
+
4
+ Every spatial tool that is addressable by geometry composes from this block.
5
+
6
+ Usage
7
+ -----
8
+ Authors write a **pure sync kernel** that receives a bare GeoJSON geometry dict
9
+ and returns a result envelope. The decorator handles:
10
+
11
+ resolve → cache check → compute → store → provenance → commit
12
+
13
+ Example::
14
+
15
+ from aihydro_core.features.compute import feature_tool
16
+
17
+ @feature_tool(product="twi", citations=["usgs_3dep", "copernicus_glo30"])
18
+ def _twi_stats_kernel(geom: dict, *, resolution: int = 30) -> dict:
19
+ # geom is already resolved — pure computation only.
20
+ # (The domain kernel is imported by the *caller's* package, never here:
21
+ # core depends on nobody.)
22
+ return compute_twi_stats(geom, resolution=resolution)
23
+
24
+ # Domain-side thin wrapper (in the tools package, not core):
25
+ store = load_store(store_id)
26
+ result = _twi_stats_kernel(store=store, feature=feature_ref, resolution=30)
27
+
28
+ Wrapped function signature
29
+ --------------------------
30
+ fn_wrapped(store: Store, feature: str | dict | None = None, **params) -> dict
31
+
32
+ Parameters
33
+ ~~~~~~~~~~
34
+ store : Store
35
+ A loaded Store instance (typically a HydroSession). The caller is
36
+ responsible for loading and passing it. Core never imports HydroSession.
37
+ feature : str | dict | None
38
+ Feature reference — resolved via FeatureRegistry. str → id/name lookup;
39
+ dict → inline GeoJSON (registered on the fly); None → active feature.
40
+ **params
41
+ Compute parameters forwarded to the wrapped kernel unchanged. These form
42
+ the params_key for the three-level cache.
43
+
44
+ Result
45
+ ------
46
+ The result dict from the kernel (or a cached envelope on hit).
47
+ Always includes ``feature_id`` and ``_cache_hit`` keys.
48
+
49
+ Cache key
50
+ ---------
51
+ ``param_hash(params)`` — deterministic SHA-256 hex of the sorted JSON params.
52
+ Same geometry + same params → hit. Different geometry → miss (different key).
53
+ """
54
+ from __future__ import annotations
55
+
56
+ import functools
57
+ import logging
58
+ from datetime import datetime, timezone
59
+ from typing import TYPE_CHECKING, Any, Callable, TypeVar
60
+
61
+ from ..primitives.hashing import param_hash, content_hash
62
+ from ..features.registry import FeatureRegistry
63
+
64
+ if TYPE_CHECKING:
65
+ from ..store.protocol import Store
66
+
67
+ log = logging.getLogger("aihydro_core.features.compute")
68
+
69
+ F = TypeVar("F", bound=Callable[..., dict])
70
+
71
+
72
+ def _now_iso() -> str:
73
+ return datetime.now(timezone.utc).isoformat()
74
+
75
+
76
+ # ---------------------------------------------------------------------------
77
+ # @feature_tool decorator
78
+ # ---------------------------------------------------------------------------
79
+
80
+ def feature_tool(
81
+ product: str,
82
+ citations: list[str] | None = None,
83
+ ) -> Callable[[F], "FeatureToolWrapper"]:
84
+ """
85
+ Decorator factory that wraps a pure sync spatial kernel.
86
+
87
+ Parameters
88
+ ----------
89
+ product : str
90
+ The result product name — e.g. "twi", "cn", "signatures".
91
+ Used as the top-level key in the three-level result store.
92
+ citations : list[str] | None
93
+ Citation keys (data source identifiers) to record on every run.
94
+ Accumulated in the store's citation set.
95
+
96
+ Returns
97
+ -------
98
+ A decorator that wraps ``fn(geom: dict, **params) -> dict``
99
+ into ``fn_wrapped(store, feature=None, **params) -> dict``.
100
+ """
101
+ _citations: list[str] = citations or []
102
+
103
+ def decorator(fn: F) -> "FeatureToolWrapper":
104
+
105
+ @functools.wraps(fn)
106
+ def wrapper(
107
+ store: "Store",
108
+ feature: "str | dict | list | None" = None,
109
+ **params: Any,
110
+ ) -> dict:
111
+ """
112
+ Resolve → cache check → compute → store → provenance → commit.
113
+
114
+ Parameters
115
+ ----------
116
+ store : Store
117
+ Loaded Store instance (e.g. HydroSession).
118
+ feature : str | dict | list | None
119
+ Feature reference. None → active/single feature.
120
+ **list** → batch fan-out: each element resolved independently;
121
+ returns ``{"batch": True, "n_features": N, "results": {...}}``.
122
+ **params
123
+ Forwarded to the kernel; also form the cache key.
124
+ """
125
+ # C3: batch fan-out — feature is a list of refs
126
+ if isinstance(feature, list):
127
+ results: dict[str, dict] = {}
128
+ errors: dict[str, str] = {}
129
+ for ref in feature:
130
+ try:
131
+ r = wrapper(store, feature=ref, **params)
132
+ results[r.get("feature_id", str(ref))] = r
133
+ except Exception as exc:
134
+ fid = str(ref)
135
+ errors[fid] = str(exc)
136
+ log.warning(
137
+ "Batch feature_tool %s failed for ref=%r: %s",
138
+ product, ref, exc,
139
+ )
140
+ return {
141
+ "batch": True,
142
+ "product": product,
143
+ "n_features": len(feature),
144
+ "n_success": len(results),
145
+ "n_error": len(errors),
146
+ "results": results,
147
+ **({"errors": errors} if errors else {}),
148
+ }
149
+
150
+ # 1. Resolve feature ref → Feature
151
+ registry = FeatureRegistry(store)
152
+ feat = registry.resolve(feature)
153
+
154
+ # 2. Cache check
155
+ key = param_hash(params) if params else param_hash({})
156
+ cached = store.get_result(product, feat.feature_id, key)
157
+ if cached is not None:
158
+ log.debug(
159
+ "Cache hit: product=%s feature_id=%s key=%s",
160
+ product, feat.feature_id, key,
161
+ )
162
+ return {**cached, "feature_id": feat.feature_id, "_cache_hit": True}
163
+
164
+ # 3. Compute
165
+ log.debug(
166
+ "Cache miss — computing: product=%s feature_id=%s key=%s",
167
+ product, feat.feature_id, key,
168
+ )
169
+ raw = fn(feat.geojson, **params)
170
+
171
+ # 4. Normalise result into {data, meta} envelope
172
+ if isinstance(raw, dict) and "data" in raw and "meta" in raw:
173
+ # Kernel already returned an envelope
174
+ envelope: dict = dict(raw)
175
+ envelope["meta"] = {
176
+ **raw["meta"],
177
+ "computed_at": raw["meta"].get("computed_at") or _now_iso(),
178
+ "feature_id": feat.feature_id,
179
+ "feature_name": feat.name,
180
+ "params": params,
181
+ }
182
+ else:
183
+ # Kernel returned bare data dict — wrap it
184
+ envelope = {
185
+ "data": raw,
186
+ "meta": {
187
+ "computed_at": _now_iso(),
188
+ "tool": fn.__name__,
189
+ "feature_id": feat.feature_id,
190
+ "feature_name": feat.name,
191
+ "params": params,
192
+ },
193
+ }
194
+
195
+ # 5. Store result
196
+ store.put_result(product, feat.feature_id, key, envelope)
197
+
198
+ # 6. Provenance
199
+ if _citations:
200
+ store.add_citations(_citations)
201
+
202
+ try:
203
+ from ..primitives.provenance import Artifact as _Artifact
204
+ art = _Artifact(
205
+ artifact_id=f"{product}_{feat.feature_id}_{key[:8]}",
206
+ type="derived",
207
+ source=product,
208
+ params=params,
209
+ param_hash=key,
210
+ content_hash=content_hash(envelope.get("data", {})),
211
+ )
212
+ store.store_artifact(art)
213
+ except Exception as _e:
214
+ log.debug("Provenance artifact recording failed (non-fatal): %s", _e)
215
+
216
+ # 7. Commit
217
+ store.commit()
218
+
219
+ return {**envelope, "feature_id": feat.feature_id}
220
+
221
+ # Attach metadata so callers can introspect decoration intent
222
+ wrapper._feature_tool_product = product # type: ignore[attr-defined]
223
+ wrapper._feature_tool_citations = _citations # type: ignore[attr-defined]
224
+ wrapper._feature_tool_inner = fn # type: ignore[attr-defined]
225
+
226
+ return wrapper # type: ignore[return-value]
227
+
228
+ return decorator
229
+
230
+
231
+ # ---------------------------------------------------------------------------
232
+ # FeatureToolWrapper type alias (for documentation only)
233
+ # ---------------------------------------------------------------------------
234
+
235
+ class FeatureToolWrapper:
236
+ """
237
+ Callable returned by @feature_tool.
238
+
239
+ Not instantiated directly — used only as a type annotation target so IDE
240
+ tooling can show the wrapper's signature.
241
+ """
242
+ _feature_tool_product: str
243
+ _feature_tool_citations: list[str]
244
+ _feature_tool_inner: Callable[..., dict]
245
+
246
+ def __call__(
247
+ self,
248
+ store: "Store",
249
+ feature: str | dict | None = None,
250
+ **params: Any,
251
+ ) -> dict: ...
252
+
253
+
254
+ __all__ = ["feature_tool", "FeatureToolWrapper"]
@@ -0,0 +1,183 @@
1
+ """
2
+ FeatureRegistry — resolve geometry references to Feature objects.
3
+
4
+ This is the agent-facing geometry-addressing layer. An agent passes a short
5
+ string id (or name), not a raw GeoJSON blob. The registry resolves that ref
6
+ to a fully-loaded Feature so the compute kernel receives a clean geometry.
7
+
8
+ Resolution chain (resolve(ref)):
9
+ str → id lookup → name lookup → GeoJSON-string parse → register-on-fly
10
+ dict → treat as GeoJSON, register on the fly
11
+ None → active feature (preserves single-watershed back-compat)
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import re
17
+ import uuid
18
+ from typing import TYPE_CHECKING
19
+
20
+ from ..primitives.geometry import Feature
21
+ from ..primitives.errors import FeatureNotFoundError
22
+
23
+ if TYPE_CHECKING:
24
+ from ..store.protocol import Store
25
+
26
+
27
+ def _slugify(s: str) -> str:
28
+ s = re.sub(r"[^a-zA-Z0-9]+", "-", s).strip("-").lower()
29
+ return (s[:32] or uuid.uuid4().hex[:12])
30
+
31
+
32
+ def _extract_geometry(raw: dict) -> dict:
33
+ """Return the bare geometry dict from either a GeoJSON Feature or bare geometry."""
34
+ if raw.get("type") == "Feature":
35
+ return raw.get("geometry", raw)
36
+ return raw
37
+
38
+
39
+ class FeatureRegistry:
40
+ """
41
+ Geometry registry backed by a Store.
42
+
43
+ Every spatial tool receives a FeatureRegistry built from the session's Store.
44
+ The registry is the single resolution authority — tools never read geometries
45
+ directly from the session.
46
+ """
47
+
48
+ def __init__(self, store: Store) -> None:
49
+ self._store = store
50
+
51
+ # ------------------------------------------------------------------ #
52
+ # Registration #
53
+ # ------------------------------------------------------------------ #
54
+
55
+ def register(
56
+ self,
57
+ geojson: dict | str,
58
+ name: str = "",
59
+ source: str = "",
60
+ feature_id: str | None = None,
61
+ set_active: bool = False,
62
+ area_km2: float | None = None,
63
+ bbox: tuple[float, float, float, float] | None = None,
64
+ ) -> Feature:
65
+ """
66
+ Register a geometry and return the Feature with its stable id.
67
+
68
+ Parameters
69
+ ----------
70
+ geojson : dict | str
71
+ GeoJSON dict or JSON string. Accepts bare geometry or GeoJSON Feature.
72
+ name : str
73
+ Human-readable label. Used as the display name and as a lookup alias.
74
+ source : str
75
+ Registration source tag ("delineate_watershed", "map_annotation", ...).
76
+ feature_id : str | None
77
+ Explicit id to assign. If None, derived from name (slugified) or UUID.
78
+ set_active : bool
79
+ If True, mark this feature as the active (default) feature.
80
+ """
81
+ if isinstance(geojson, str):
82
+ try:
83
+ geojson = json.loads(geojson)
84
+ except json.JSONDecodeError as e:
85
+ raise ValueError(f"geojson is not valid JSON: {e}") from e
86
+
87
+ geom = _extract_geometry(geojson)
88
+
89
+ if feature_id is None:
90
+ feature_id = _slugify(name) if name else uuid.uuid4().hex[:12]
91
+
92
+ feature = Feature(
93
+ feature_id=feature_id,
94
+ geojson=geom,
95
+ name=name,
96
+ source=source,
97
+ bbox=bbox,
98
+ area_km2=area_km2,
99
+ )
100
+ self._store.put_feature(feature)
101
+ if set_active:
102
+ self._store.set_active_feature_id(feature_id)
103
+ self._store.commit()
104
+ return feature
105
+
106
+ # ------------------------------------------------------------------ #
107
+ # Resolution #
108
+ # ------------------------------------------------------------------ #
109
+
110
+ def resolve(self, ref: str | dict | None) -> Feature:
111
+ """
112
+ Resolve a reference to a Feature.
113
+
114
+ ref=None → active feature (single-watershed back-compat)
115
+ ref=str → id lookup, then name lookup, then GeoJSON parse
116
+ ref=dict → treat as raw GeoJSON, register on the fly
117
+
118
+ Raises FeatureNotFoundError with the list of available ids if ref
119
+ cannot be resolved — so the error itself tells the agent what to do.
120
+ """
121
+ if ref is None:
122
+ return self._resolve_active()
123
+
124
+ if isinstance(ref, dict):
125
+ return self.register(ref, source="on-the-fly")
126
+
127
+ # --- string: id → name → GeoJSON parse → not found ---
128
+ feat = self._store.get_feature(ref)
129
+ if feat is not None:
130
+ return feat
131
+
132
+ for f in self._store.list_features():
133
+ if f.name and f.name == ref:
134
+ return f
135
+
136
+ try:
137
+ parsed = json.loads(ref)
138
+ if isinstance(parsed, dict):
139
+ return self.register(parsed, source="on-the-fly")
140
+ except (json.JSONDecodeError, ValueError):
141
+ pass
142
+
143
+ available = [f.feature_id for f in self._store.list_features()]
144
+ raise FeatureNotFoundError(ref=ref, available=available)
145
+
146
+ def resolve_many(self, refs: list[str | dict]) -> list[Feature]:
147
+ """Resolve a list of refs; raises FeatureNotFoundError on first miss."""
148
+ return [self.resolve(r) for r in refs]
149
+
150
+ def _resolve_active(self) -> Feature:
151
+ active_id = self._store.get_active_feature_id()
152
+ if active_id:
153
+ feat = self._store.get_feature(active_id)
154
+ if feat is not None:
155
+ return feat
156
+ # Fall back: return the most recently registered feature if only one exists
157
+ features = self._store.list_features()
158
+ if len(features) == 1:
159
+ return features[0]
160
+ raise FeatureNotFoundError(
161
+ ref="<active>",
162
+ available=[f.feature_id for f in features] if features else None,
163
+ )
164
+
165
+ # ------------------------------------------------------------------ #
166
+ # Listing + active #
167
+ # ------------------------------------------------------------------ #
168
+
169
+ def list(self) -> list[Feature]:
170
+ """Return all registered features."""
171
+ return self._store.list_features()
172
+
173
+ def set_active(self, feature_id: str) -> None:
174
+ """Set the active (default) feature by id."""
175
+ feat = self._store.get_feature(feature_id)
176
+ if feat is None:
177
+ available = [f.feature_id for f in self._store.list_features()]
178
+ raise FeatureNotFoundError(ref=feature_id, available=available)
179
+ self._store.set_active_feature_id(feature_id)
180
+ self._store.commit()
181
+
182
+ def get_active_id(self) -> str | None:
183
+ return self._store.get_active_feature_id()