isku 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
isku-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,186 @@
1
+ Metadata-Version: 2.3
2
+ Name: isku
3
+ Version: 0.1.0
4
+ Summary: Minimalist Python + xarray-based climate impact/damage projection framework for researchers with little time.
5
+ Author: Brewster Malevich
6
+ Author-email: Brewster Malevich <bmalevich@rhg.com>
7
+ Requires-Dist: xarray>=2026.4.0
8
+ Requires-Python: >=3.14
9
+ Description-Content-Type: text/markdown
10
+
11
+ # isku
12
+
13
+ Minimalist Python + xarray-based climate impact/damage projection framework for researchers with little time.
14
+
15
+ > [!CAUTION]
16
+ > This is a prototype. It is likely to change in breaking ways. It might delete all your data. Don't use it in production.
17
+
18
+ ## Features
19
+
20
+ * Define and apply three-step models to project climate effects, impacts, and damages.
21
+
22
+ * Extract regionalized variables from regularly gridded data, such as downscaled general circulation model output.
23
+
24
+ * Minimalist.
25
+
26
+ * Loosely coupled components and protocols for quick scripts with functions or gnarly OOP-heavy applications.
27
+
28
+ * Designed around `xarray.Dataset` to work with larger-than-memory datasets and distributed computing (dask!), GPUs, TPUs, streaming datasets.
29
+
30
+ * Great for weird ad hoc projects and researchers that love rechunking big data!
31
+
32
+ ## Example
33
+
34
+ ### Projection
35
+
36
+ Projecting data with a model in `isku` is similar to the preprocess/predict/postprocess workflow you might already be familar with.
37
+
38
+ In `isku`, we could do a linear model with pre/post-processing like:
39
+
40
+ ```python
41
+ import isku
42
+
43
+ import numpy as np
44
+ import xarray as xr
45
+
46
+ # Some toy input data to work with.
47
+ input_data = xr.Dataset(
48
+ {
49
+ "coef": (["region"], [0, 0, 0]),
50
+ "tas": (["region"], [1, 2, 3]),
51
+ }
52
+ )
53
+
54
+ # Define a basic workflow for the projection model, pre/post-processing steps.
55
+ def _preprocess(ds):
56
+ my_coef = ds["coef"] + 1
57
+ my_tas = ds["tas"]
58
+ return xr.Dataset({"coef": my_coef, "tas": my_tas})
59
+
60
+
61
+ def _linear_impact_model(ds):
62
+ y = ds["coef"] * 2 + ds["tas"]
63
+ return xr.Dataset({"impact": y})
64
+
65
+
66
+ def _postprocess(ds):
67
+ return ds[["impact"]] + 10
68
+
69
+
70
+ test_impact_model = isku.build_projection_workflow(
71
+ pre=_preprocess,
72
+ project=_linear_impact_model,
73
+ post=_postprocess,
74
+ )
75
+
76
+ # Put it together and run the projection.
77
+ projected = isku.project(input_data, model=test_impact_model)
78
+ ```
79
+
80
+ This example uses pure functions to define workflow steps. This can be useful for quick analysis but `isku` also accepts
81
+ custom objects adhering to the select protocols. The intent is that components can be quickly used, ignored, extended or
82
+ replaced as needed by a project.
83
+
84
+ ### Extracting regions
85
+
86
+ The relationship between data transformations and region extraction can be complex in impact and damage research.
87
+
88
+ Say you have temperature data on a regular latitude-longitude grid. You need to extract regions from this grid, e.g.
89
+ political boundaries, but you need to weight each temperature grid point by the proportion of the region's population
90
+ exposed to temperature within each region. To make matters more complex you likely need to be specific about additional processing and transformation
91
+ before and after regionalization. This is a niche case but a common headache.
92
+
93
+ We can handle this type of transformation in `isku` like:
94
+
95
+ ```python
96
+ import isku
97
+
98
+ import numpy as np
99
+ import xarray as xr
100
+
101
+
102
+ # Define some toy data to transform and regionalize.
103
+ gridded_data = xr.DataArray(
104
+ np.arange(25).reshape([5, 5]),
105
+ dims=("lon", "lat"),
106
+ coords={
107
+ "lon": np.arange(5),
108
+ "lat": np.arange(5),
109
+ },
110
+ name="variable1",
111
+ ).to_dataset()
112
+
113
+ # Refine regions and how they weight each grid point in the gridded data.
114
+ # This is usually read from file, but we're making up a quick example dataset.
115
+ my_regions = isku.GridWeightingRegions(
116
+ xr.Dataset(
117
+ {
118
+ "region": (["idx"], ["a", "a", "a", "b"]),
119
+ "weight": (["idx"], [0.3, 0.3, 0.3, 1.0]),
120
+ "lon": (["idx"], [2, 3, 4, 1]),
121
+ "lat": (["idx"], [0, 0, 0, 2]),
122
+ },
123
+ )
124
+ )
125
+
126
+ # Define workflow with pre/post regionalization transformations.
127
+ def _add_one(ds):
128
+ return ds[["variable1"]] + 1
129
+
130
+
131
+ def _add_ten(ds):
132
+ return ds[["variable1"]] + 10
133
+
134
+
135
+ my_extraction_workflow = isku.build_extraction_workflow(
136
+ pre=_add_one, # Before regionalization.
137
+ post=_add_ten, # After regionalization.
138
+ )
139
+
140
+
141
+ # Put it all together to extract regions from gridded data.
142
+ transformed = isku.extract_regions(
143
+ gridded_data,
144
+ workflow=my_extraction_workflow,
145
+ regions=my_regions,
146
+ )
147
+ ```
148
+
149
+
150
+ ## Installation
151
+
152
+ Using `pip` you can install this package with
153
+
154
+ ```
155
+ pip install isku
156
+ ```
157
+
158
+ for a `uv` project this is
159
+
160
+ ```
161
+ uv add isku
162
+ ```
163
+
164
+ Install the unreleased and unstable bleeding-edge version of the package with:
165
+
166
+ ```shell
167
+ pip install git+https://github.com/brews/isku
168
+ ```
169
+
170
+ using `pip` or with a project in `uv`, do
171
+
172
+ ```shell
173
+ uv add git+https://github.com/brews/isku
174
+ ```
175
+
176
+ ## Is this any good?
177
+
178
+ Yes.
179
+
180
+ ## Support
181
+
182
+ `isku` is open-source software made available under the terms of either the MIT License or the Apache License 2.0, at your option.
183
+
184
+ See CONTRIBUTING.md if you would like to contribute.
185
+
186
+ Changes for each release are summarized in CHANGELOG.md.
isku-0.1.0/README.md ADDED
@@ -0,0 +1,176 @@
1
+ # isku
2
+
3
+ Minimalist Python + xarray-based climate impact/damage projection framework for researchers with little time.
4
+
5
+ > [!CAUTION]
6
+ > This is a prototype. It is likely to change in breaking ways. It might delete all your data. Don't use it in production.
7
+
8
+ ## Features
9
+
10
+ * Define and apply three-step models to project climate effects, impacts, and damages.
11
+
12
+ * Extract regionalized variables from regularly gridded data, such as downscaled general circulation model output.
13
+
14
+ * Minimalist.
15
+
16
+ * Loosely coupled components and protocols for quick scripts with functions or gnarly OOP-heavy applications.
17
+
18
+ * Designed around `xarray.Dataset` to work with larger-than-memory datasets and distributed computing (dask!), GPUs, TPUs, streaming datasets.
19
+
20
+ * Great for weird ad hoc projects and researchers that love rechunking big data!
21
+
22
+ ## Example
23
+
24
+ ### Projection
25
+
26
+ Projecting data with a model in `isku` is similar to the preprocess/predict/postprocess workflow you might already be familar with.
27
+
28
+ In `isku`, we could do a linear model with pre/post-processing like:
29
+
30
+ ```python
31
+ import isku
32
+
33
+ import numpy as np
34
+ import xarray as xr
35
+
36
+ # Some toy input data to work with.
37
+ input_data = xr.Dataset(
38
+ {
39
+ "coef": (["region"], [0, 0, 0]),
40
+ "tas": (["region"], [1, 2, 3]),
41
+ }
42
+ )
43
+
44
+ # Define a basic workflow for the projection model, pre/post-processing steps.
45
+ def _preprocess(ds):
46
+ my_coef = ds["coef"] + 1
47
+ my_tas = ds["tas"]
48
+ return xr.Dataset({"coef": my_coef, "tas": my_tas})
49
+
50
+
51
+ def _linear_impact_model(ds):
52
+ y = ds["coef"] * 2 + ds["tas"]
53
+ return xr.Dataset({"impact": y})
54
+
55
+
56
+ def _postprocess(ds):
57
+ return ds[["impact"]] + 10
58
+
59
+
60
+ test_impact_model = isku.build_projection_workflow(
61
+ pre=_preprocess,
62
+ project=_linear_impact_model,
63
+ post=_postprocess,
64
+ )
65
+
66
+ # Put it together and run the projection.
67
+ projected = isku.project(input_data, model=test_impact_model)
68
+ ```
69
+
70
+ This example uses pure functions to define workflow steps. This can be useful for quick analysis but `isku` also accepts
71
+ custom objects adhering to the select protocols. The intent is that components can be quickly used, ignored, extended or
72
+ replaced as needed by a project.
73
+
74
+ ### Extracting regions
75
+
76
+ The relationship between data transformations and region extraction can be complex in impact and damage research.
77
+
78
+ Say you have temperature data on a regular latitude-longitude grid. You need to extract regions from this grid, e.g.
79
+ political boundaries, but you need to weight each temperature grid point by the proportion of the region's population
80
+ exposed to temperature within each region. To make matters more complex you likely need to be specific about additional processing and transformation
81
+ before and after regionalization. This is a niche case but a common headache.
82
+
83
+ We can handle this type of transformation in `isku` like:
84
+
85
+ ```python
86
+ import isku
87
+
88
+ import numpy as np
89
+ import xarray as xr
90
+
91
+
92
+ # Define some toy data to transform and regionalize.
93
+ gridded_data = xr.DataArray(
94
+ np.arange(25).reshape([5, 5]),
95
+ dims=("lon", "lat"),
96
+ coords={
97
+ "lon": np.arange(5),
98
+ "lat": np.arange(5),
99
+ },
100
+ name="variable1",
101
+ ).to_dataset()
102
+
103
+ # Refine regions and how they weight each grid point in the gridded data.
104
+ # This is usually read from file, but we're making up a quick example dataset.
105
+ my_regions = isku.GridWeightingRegions(
106
+ xr.Dataset(
107
+ {
108
+ "region": (["idx"], ["a", "a", "a", "b"]),
109
+ "weight": (["idx"], [0.3, 0.3, 0.3, 1.0]),
110
+ "lon": (["idx"], [2, 3, 4, 1]),
111
+ "lat": (["idx"], [0, 0, 0, 2]),
112
+ },
113
+ )
114
+ )
115
+
116
+ # Define workflow with pre/post regionalization transformations.
117
+ def _add_one(ds):
118
+ return ds[["variable1"]] + 1
119
+
120
+
121
+ def _add_ten(ds):
122
+ return ds[["variable1"]] + 10
123
+
124
+
125
+ my_extraction_workflow = isku.build_extraction_workflow(
126
+ pre=_add_one, # Before regionalization.
127
+ post=_add_ten, # After regionalization.
128
+ )
129
+
130
+
131
+ # Put it all together to extract regions from gridded data.
132
+ transformed = isku.extract_regions(
133
+ gridded_data,
134
+ workflow=my_extraction_workflow,
135
+ regions=my_regions,
136
+ )
137
+ ```
138
+
139
+
140
+ ## Installation
141
+
142
+ Using `pip` you can install this package with
143
+
144
+ ```
145
+ pip install isku
146
+ ```
147
+
148
+ for a `uv` project this is
149
+
150
+ ```
151
+ uv add isku
152
+ ```
153
+
154
+ Install the unreleased and unstable bleeding-edge version of the package with:
155
+
156
+ ```shell
157
+ pip install git+https://github.com/brews/isku
158
+ ```
159
+
160
+ using `pip` or with a project in `uv`, do
161
+
162
+ ```shell
163
+ uv add git+https://github.com/brews/isku
164
+ ```
165
+
166
+ ## Is this any good?
167
+
168
+ Yes.
169
+
170
+ ## Support
171
+
172
+ `isku` is open-source software made available under the terms of either the MIT License or the Apache License 2.0, at your option.
173
+
174
+ See CONTRIBUTING.md if you would like to contribute.
175
+
176
+ Changes for each release are summarized in CHANGELOG.md.
@@ -0,0 +1,26 @@
1
+ [project]
2
+ name = "isku"
3
+ version = "0.1.0"
4
+ description = "Minimalist Python + xarray-based climate impact/damage projection framework for researchers with little time."
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Brewster Malevich", email = "bmalevich@rhg.com" }
8
+ ]
9
+ requires-python = ">=3.14"
10
+ dependencies = [
11
+ "xarray>=2026.4.0",
12
+ ]
13
+
14
+ [build-system]
15
+ requires = ["uv_build>=0.11.11,<0.12.0"]
16
+ build-backend = "uv_build"
17
+
18
+ [dependency-groups]
19
+ dev = [
20
+ "numpy>=2.4.4",
21
+ "pytest>=9.0.3",
22
+ "pytest-cov>=7.1.0",
23
+ "ruff>=0.15.12",
24
+ "ty>=0.0.33",
25
+ "zensical>=0.0.40",
26
+ ]
@@ -0,0 +1,222 @@
1
+ from dataclasses import dataclass
2
+ from typing import Protocol, Callable
3
+
4
+ import xarray as xr
5
+
6
+
7
+ __all__ = [
8
+ "build_extraction_workflow",
9
+ "extract_regions",
10
+ "build_projection_workflow",
11
+ "project",
12
+ "GridWeightingRegions",
13
+ "ExtractionWorkflow",
14
+ "RegionExtractor",
15
+ "ProjectionWorkflow",
16
+ ]
17
+
18
+
19
+ class ExtractionWorkflow(Protocol):
20
+ """
21
+ Template for pre and post region extraction transformation
22
+
23
+ See Also
24
+ --------
25
+ build_extraction_workflow: Quickly build extraction workflow from functions for regionalization with pre/post transformations.
26
+ extract_regions: Apply a workflow to extract a new regionalized dataset from gridded data.
27
+ RegionExtractor: Protocol for regionalizing, or extracting regions from a dataset.
28
+ """
29
+
30
+ def pre_extract(self, ds: xr.Dataset) -> xr.Dataset:
31
+ """
32
+ Transform dataset before region extraction
33
+ """
34
+ ...
35
+
36
+ def post_extract(self, ds: xr.Dataset) -> xr.Dataset:
37
+ """
38
+ Transform dataset after region extraction
39
+ """
40
+ ...
41
+
42
+
43
+ class RegionExtractor(Protocol):
44
+ """
45
+ Protocol for extracting regions from gridded data
46
+
47
+ See Also
48
+ --------
49
+ extract_regions: Apply a workflow to extract a new regionalized dataset from gridded data with pre/post transformations.
50
+ ExtractionWorkflow: Technical protocol for a workflow with pre/post regionalization transformations.
51
+ """
52
+
53
+ def extract_regions(self, ds: xr.Dataset) -> xr.Dataset:
54
+ """
55
+ Extract and aggregate gridded dataset points into regionalized dataset
56
+ """
57
+ ...
58
+
59
+
60
+ # This dataclass is a quick and simple way to get a concrete instance of the protocol.
61
+ @dataclass(frozen=True)
62
+ class _SimpleExtractionWorkflow(ExtractionWorkflow):
63
+ pre_extract: Callable[[xr.Dataset], xr.Dataset]
64
+ post_extract: Callable[[xr.Dataset], xr.Dataset]
65
+
66
+
67
+ def build_extraction_workflow(
68
+ *, pre: Callable[[xr.Dataset], xr.Dataset], post: Callable[[xr.Dataset], xr.Dataset]
69
+ ) -> ExtractionWorkflow:
70
+ """
71
+ Build a workflow of tranformation steps applied to input gridded data, pre/post regionalization, to create a derived variable as output
72
+
73
+ This function is a quick and simple way to build an ExtractionWorkflow from two simple functions.
74
+
75
+ These steps should be general. They may contain logic for sanity checks
76
+ on inputs and outputs, calculating derived variables and climate indices,
77
+ adding or checking metadata or units. Avoid including logic for cleaning,
78
+ or harmonizing input data, especially if it is specific to a single
79
+ project's usecase. Generally avoid using a single strategy to output
80
+ multiple unrelated variables.
81
+
82
+ See Also
83
+ --------
84
+ extract_regions: Apply a workflow to extract a new regionalized dataset from gridded data.
85
+ build_extraction_workflow: Quickly build extraction workflow from functions for regionalization.
86
+ ExtractionWorkflow: The underlaying protocol for a workflow that extracts a regionalized dataset.
87
+ """
88
+ return _SimpleExtractionWorkflow(pre_extract=pre, post_extract=post)
89
+
90
+
91
+ # Use class for segment weights because we're making assumptions/enforcements about the weight data's content and interactions...
92
+ class GridWeightingRegions(RegionExtractor):
93
+ """
94
+ Regions that can be extracted from regularly-gridded data after weighting grid points
95
+
96
+ 'weights' dataset must have "lat", "lon", "weight", "region".
97
+
98
+ Raises
99
+ ------
100
+ ValueError
101
+ If 'weights' is missing "lat", "lon", "weight" or "region" variables.
102
+
103
+ See Also
104
+ --------
105
+ extract_regions: Use SegmentWeights in a workflow to extract new regionalized dataset.
106
+ build_extraction_workflow: Quickly build extraction workflow from functions for regionalization.
107
+ RegionExtractor: Protocol for regionalizing, or extracting regions from a dataset.
108
+ """
109
+
110
+ def __init__(self, weights: xr.Dataset):
111
+ target_variables = ("lat", "lon", "weight", "region")
112
+ missing_variables = [v for v in target_variables if v not in weights.variables]
113
+ if missing_variables:
114
+ raise ValueError(
115
+ f"input weights is missing required {missing_variables} variable(s)"
116
+ )
117
+ self._data = weights
118
+
119
+ def extract_regions(self, ds: xr.Dataset) -> xr.Dataset:
120
+ """
121
+ Regionalize input gridded data after multiplying 'ds' by weights and summing the product within each region.
122
+
123
+ 'ds' must have "lat", "lon" coordinates exactly matching "lat", "lon" in weights.
124
+ """
125
+ # TODO: See how this errors in different common scenarios. What happens on the
126
+ # unhappy path?
127
+ region_sel = ds.sel(lat=self._data["lat"], lon=self._data["lon"])
128
+ out = (region_sel * self._data["weight"]).groupby(self._data["region"]).sum()
129
+ # TODO: Maybe drop lat/lon and set 'region' as dim/coord? I feel like we can do
130
+ # this because we're asking weights to strictly match input's lat/lon. Maybe
131
+ # make this a req of segment weights we're reading in?
132
+ return out
133
+
134
+
135
+ def extract_regions(
136
+ ds: xr.Dataset, *, workflow: ExtractionWorkflow, regions: RegionExtractor
137
+ ) -> xr.Dataset:
138
+ """
139
+ Use transformations in 'workflow' to extract 'regions' from gridded dataset, 'ds', returning a regionalized dataset
140
+
141
+ This function specifically does not just regionalize through zonal aggregation. It uses 'workflow' to apply pre/post regionalization transformations to create new datasets and variables.
142
+
143
+ See Also
144
+ --------
145
+ build_extraction_workflow: Quickly build extraction workflow from functions for regionalization.
146
+ """
147
+ return workflow.post_extract(regions.extract_regions(workflow.pre_extract(ds)))
148
+
149
+
150
+ class ProjectionWorkflow(Protocol):
151
+ """
152
+ Template for projecting a model with pre and post processing.
153
+
154
+ See Also
155
+ --------
156
+ build_projection_workflow: Build a projection workflow from simple functions.
157
+ """
158
+
159
+ def pre_project(self, d: xr.Dataset) -> xr.Dataset:
160
+ """
161
+ Pre-process a dataset before projection
162
+ """
163
+ ...
164
+
165
+ def project(self, d: xr.Dataset) -> xr.Dataset:
166
+ """
167
+ Create a projection from a dataset
168
+ """
169
+ ...
170
+
171
+ def post_project(self, d: xr.Dataset) -> xr.Dataset:
172
+ """
173
+ Process a projected dataset
174
+ """
175
+ ...
176
+
177
+
178
+ # This dataclass is a quick and simple way to get a concrete instance of the protocol.
179
+ @dataclass(frozen=True)
180
+ class _SimpleProjectionWorkflow(ProjectionWorkflow):
181
+ pre_project: Callable[[xr.Dataset], xr.Dataset]
182
+ project: Callable[[xr.Dataset], xr.Dataset]
183
+ post_project: Callable[[xr.Dataset], xr.Dataset]
184
+
185
+
186
+ def build_projection_workflow(
187
+ *,
188
+ pre: Callable[[xr.Dataset], xr.Dataset],
189
+ project: Callable[[xr.Dataset], xr.Dataset],
190
+ post: Callable[[xr.Dataset], xr.Dataset],
191
+ ) -> ProjectionWorkflow:
192
+ """
193
+ Use simple functions to quickly build a model to project effects, impacts and/or damages.
194
+
195
+ This function is a quick and simple way to build an ProjectionWorkflow from three simple functions.
196
+
197
+ See Also
198
+ --------
199
+ project: Apply a projection workflow to a dataset.
200
+ ProjectionWorkflow: Technical ProjectionWorkflow protocol.
201
+ """
202
+ return _SimpleProjectionWorkflow(
203
+ pre_project=pre,
204
+ project=project,
205
+ post_project=post,
206
+ )
207
+
208
+
209
+ def project(d: xr.Dataset, *, model: ProjectionWorkflow) -> xr.Dataset:
210
+ """
211
+ Project a dataset of predictors, 'd', with 'model' to return a projected dataset
212
+
213
+ See Also
214
+ --------
215
+ build_projection_workflow: Build a projection workflow from simple functions.
216
+ ProjectionWorkflow: Technical ProjectionWorkflow protocol.
217
+ """
218
+ preprocessed = model.pre_project(d)
219
+ projected = model.project(preprocessed)
220
+ postprocessed = model.post_project(projected)
221
+
222
+ return postprocessed
File without changes