cog-worker 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cog_worker/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """A python module for scalable analysis of Cloud Optimized GeoTIFFs.
2
+
3
+ COG Worker is a simple library to help you chunk and run large scale analysis
4
+ on Cloud Optimized GeoTIFFs (COGS).
5
+ """
6
+
7
+ __version__ = "0.3.0"
8
+
9
+ from .worker import Worker # noqa
10
+ from .manager import Manager # noqa
11
+ from .types import * # noqa
@@ -0,0 +1,176 @@
1
+ """Distributed processing with Dask.
2
+
3
+ The DaskManager class provides an identical interface to the
4
+ :obj:`cog_worker.manager.Manager`, but executes tasks in a
5
+ `Dask cluster <https://distributed.dask.org/>`_, instead of on your local machine.
6
+
7
+ Note:
8
+ cog_worker does not include dask.distributed as a dependency by default.
9
+ In order to use cog_worker.distributed you must install dask.distributed::
10
+
11
+ pip install dask[distributed]
12
+
13
+ Example:
14
+ Read a COG in chunks and sum the results::
15
+
16
+ from cog_worker.distributed import DaskManager
17
+ from dask.distributed import Client, LocalCluster
18
+
19
+ def my_analysis(worker):
20
+ arr = worker.read('example-cog.tif')
21
+ return arr.sum()
22
+
23
+ cluster = LocalCluster()
24
+ client = Client(cluster)
25
+ manager = DaskManager(client)
26
+
27
+ results = manager.chunk_execute(my_analysis)
28
+ total = sum(results)
29
+ """
30
+
31
+ import logging
32
+ from typing import Any, Iterable, Iterator, Mapping, Tuple, Union
33
+
34
+ import dask
35
+ import dask.distributed
36
+ from dask.delayed import Delayed
37
+ from pyproj import Proj
38
+
39
+ import cog_worker
40
+ from cog_worker.types import BoundingBox, WorkerFunction
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ class DaskManager(cog_worker.manager.Manager):
46
+ """Class for chunking and executing cog_worker functions in a dask cluster.
47
+
48
+ The DaskManager identical to the cog_worker.manager.Manager, except that it
49
+ executes functions in a Dask cluster instead of locally.
50
+ """
51
+
52
+ def __init__(
53
+ self,
54
+ dask_client: dask.distributed.Client,
55
+ bounds: BoundingBox = (-180, -85, 180, 85),
56
+ proj: Union[int, str, Proj] = 3857,
57
+ scale: float = 10000,
58
+ buffer: int = 16,
59
+ ):
60
+ """Initialize a DaskManager with a dask client.
61
+
62
+ Args:
63
+ dask_client (dask.distributed.Client): The dask client to use to
64
+ execute analysis.
65
+ bounds (BoundingBox): The region to be analyzed.
66
+ proj (pyproj.Proj, str, int): The projection to analyze in.
67
+ Generally accepts any proj4 string, WKT projection, or EPSG
68
+ code. See pyproj.Proj for valid values.
69
+ scale (float): The pixel size for analysis in the projection's units
70
+ (usually meters or degrees).
71
+ buffer (int): When dividing analysis into chunks, the number of
72
+ additional pixels to read on all sides to avoid edge effects.
73
+ The ideal buffer size depends on your analysis (e.g. whether you
74
+ use convolutions or distance functions).
75
+ """
76
+ self.client = dask_client
77
+ super().__init__(bounds, proj, scale, buffer)
78
+
79
+ def execute(
80
+ self,
81
+ f: WorkerFunction,
82
+ f_args: Union[Iterable, None] = None,
83
+ f_kwargs: Union[Mapping, None] = None,
84
+ clip: bool = True,
85
+ compute: bool = True,
86
+ **kwargs,
87
+ ) -> Union[Tuple[Any, BoundingBox], Delayed]:
88
+ """Execute a cog_worker function in the DaskManager's cluster.
89
+
90
+ The execute method is the underlying method for running analysis. By
91
+ default, it will run the function over the Manager's bounding box in a
92
+ single chunk.
93
+
94
+ When executing functions, the Manager instantiates a
95
+ cog_worker.worker.Worker and passes it to the function as its first
96
+ parameter. The Worker keeps track of the scale, projection, and bounds
97
+ of its piece of the analysis, which it uses to handle the reading and
98
+ writing of Cloud Optimized GeoTIFFs.
99
+
100
+ Args:
101
+ f (:obj:`cog_worker.types.WorkerFunction`): The function to execute. The function will
102
+ recieve a cog_worker.worker.Worker as its first argument.
103
+ f_args (list): Additional arguments to pass to the function.
104
+ f_kwargs (dict): Additional keyword arguments to pass to the
105
+ function.
106
+ clip (bool): Whether or not to clip the `buffer` from the completed
107
+ analysis.
108
+ compute (bool): Whether or not to compute the chunks immediately.
109
+ **kwargs: Additional keyword arguments to overload the Manager's
110
+ properties. (bounds, proj, scale, or buffer)
111
+
112
+ Returns:
113
+ A tuple containing the return value of the function and the bounding
114
+ box of the executed analysis in the target projection. Or, if
115
+ compute is False, a Delayed object.
116
+ """
117
+ args = {
118
+ "f": f,
119
+ "f_args": f_args,
120
+ "f_kwargs": f_kwargs,
121
+ "bounds": self.bounds,
122
+ "proj": self.proj,
123
+ "scale": self.scale,
124
+ "buffer": self.buffer,
125
+ "clip": clip,
126
+ }
127
+ args.update(kwargs)
128
+ task = dask.delayed(cog_worker.manager._execute)(**args)
129
+ if compute:
130
+ future = self.client.compute(task)
131
+ return future.result() # type: ignore
132
+ return task
133
+
134
+ def chunk_execute(
135
+ self,
136
+ f: WorkerFunction,
137
+ f_args: Union[Iterable, None] = None,
138
+ f_kwargs: Union[Mapping, None] = None,
139
+ chunksize: int = 512,
140
+ compute: bool = True,
141
+ ) -> Union[Iterator[Tuple[Any, BoundingBox]], Iterator[Delayed]]: # type: ignore
142
+ """Compute chunks in parallel in the DaskManager's cluster.
143
+
144
+ Chunks will be yielded as they are completed. The order in which they
145
+ are yielded is not guaranteed.
146
+
147
+ Note:
148
+ You can estimate the memory requirement of executing a function at a
149
+ given chunksize as:
150
+ ``(chunksize + 2*buffer)**2 * number_of_bands_or_arrays * bit_depth``.
151
+
152
+ Args:
153
+ f (:obj:`cog_worker.types.WorkerFunction`): The function to execute. The function will
154
+ recieve a cog_worker.worker.Worker as its first argument.
155
+ f_args (list): Additional arguments to pass to the function.
156
+ f_kwargs (dict): Additional keyword arguments to pass to the
157
+ function.
158
+ chunksize (int): Size of the chunks in pixels (excluding buffer).
159
+ compute (bool): Whether or not to compute the chunks immediately.
160
+
161
+ Yields:
162
+ A tuple containing the return value of the function for each chunk
163
+ and the bounding box of the executed analysis in the target
164
+ projection. Or, if compute is False, a Delayed object for each chunk.
165
+ """
166
+ tasks = [
167
+ dask.delayed(cog_worker.manager._execute)(f, f_args, f_kwargs, **params)
168
+ for params in self.chunk_params(chunksize)
169
+ ]
170
+ if compute:
171
+ futures = self.client.compute(tasks)
172
+ for future, result in dask.distributed.as_completed(futures, with_results=True):
173
+ future.release()
174
+ yield result
175
+ else:
176
+ return tasks # type: ignore
cog_worker/manager.py ADDED
@@ -0,0 +1,424 @@
1
+ """Previewing, chunking, and executing analysis.
2
+
3
+ The Manager class is used to divide an area of analysis into chunks of manageable size,
4
+ and execute functions on each of these chunks.
5
+
6
+ When executing functions, the Manager instantiates a :obj:`cog_worker.worker.Worker` and passes
7
+ it to the function as its first parameter. The Worker keeps track of the scale, projection,
8
+ and bounds of its piece of the analysis, which it uses to handle the reading and writing of
9
+ Cloud Optimized GeoTIFFs.
10
+
11
+ Example:
12
+ Use the manager to preview an analysis before executing it::
13
+
14
+ from cog_worker import Manager
15
+ from rasterio.plot import show
16
+ def my_analysis(worker):
17
+ arr = worker.read('example-cog.tif')
18
+ # calculations ...
19
+ return arr
20
+
21
+ manager = Manager()
22
+ arr, bbox = manager.preview(my_analysis)
23
+ show(arr)
24
+
25
+ Execute the analysis in chunks, saving the results to disk::
26
+
27
+ manager.chuck_save('output.tif', myanalysis):
28
+
29
+ """
30
+
31
+ import logging
32
+ import math
33
+ from typing import IO, Any, Iterable, Iterator, Mapping, Tuple, Type, Union
34
+
35
+ import morecantile
36
+ import numpy as np
37
+ import rasterio as rio
38
+ import rasterio.transform
39
+ import rasterio.windows
40
+ from pyproj import Proj
41
+ from rasterio.io import DatasetWriter
42
+
43
+ import cog_worker.worker
44
+
45
+ from .types import BoundingBox, WorkerFunction
46
+ from .utils import _bbox_size, _get_profile
47
+
48
+ logger = logging.getLogger(__name__)
49
+
50
+
51
+ class Manager:
52
+ """Class for managing scalable analysis of Cloud Optimized GeoTIFFs."""
53
+
54
+ def __init__(
55
+ self,
56
+ bounds: BoundingBox = (-180, -85, 180, 85),
57
+ proj: Union[int, str, Proj] = 3857,
58
+ scale: float = 10000,
59
+ buffer: int = 16,
60
+ ):
61
+ """Initialize a Manager with a projection, scale, and bounding box for analysis.
62
+
63
+ Args:
64
+ bounds (BoundingBox): The region to be analyzed as a (west, south,
65
+ east, north) tuple.
66
+ proj (pyproj.Proj, str, int): The projection to analyze in.
67
+ Generally accepts any proj4 string, WKT projection, or EPSG
68
+ code. See pyproj.Proj for valid values.
69
+ scale (float): The pixel size for analysis in the projection's units
70
+ (usually meters or degrees).
71
+ buffer (int): When dividing analysis into chunks, the number of additional pixels
72
+ to read on all sides to avoid edge effects. The ideal buffer size depends on
73
+ your analysis (e.g. whether you use convolutions or distance functions).
74
+ """
75
+ self.proj = proj if isinstance(proj, Proj) else Proj(proj, preserve_units=False)
76
+ self.bounds = bounds
77
+ self.scale = scale
78
+ self.buffer = buffer
79
+ self._proj_bounds = self.proj.transform_bounds(*bounds)
80
+ self.tms = morecantile.TileMatrixSet.custom(list(self._proj_bounds), self.proj.crs)
81
+
82
+ def execute(
83
+ self,
84
+ f: WorkerFunction,
85
+ f_args: Union[Iterable, None] = None,
86
+ f_kwargs: Union[Mapping, None] = None,
87
+ clip: bool = True,
88
+ **kwargs,
89
+ ) -> Tuple[Any, BoundingBox]:
90
+ """Execute a function that takes a cog_worker.worker.Worker as its first parameter.
91
+
92
+ The execute method is the underlying method for running analysis. By default, it
93
+ will run the function once for the Manager's given scale and bounding box.
94
+
95
+ When executing functions, the Manager instantiates a cog_worker.worker.Worker and passes
96
+ it to the function as its first parameter. The Worker keeps track of the scale, projection,
97
+ and bounds of its piece of the analysis, which it uses to handle the reading and writing of
98
+ Cloud Optimized GeoTIFFs.
99
+
100
+ Args:
101
+ f (:obj:`cog_worker.types.WorkerFunction`): The function to execute. The function will recieve a
102
+ cog_worker.worker.Worker as its first argument.
103
+ f_args (list): Additional arguments to pass to the function.
104
+ f_kwargs (dict): Additional keyword arguments to pass to the function.
105
+ clip (bool): Whether or not to clip the buffer from the completed analysis.
106
+ **kwargs: Additional keyword arguments to overload the Manager's properties.
107
+ (bounds, proj, scale, or buffer)
108
+
109
+ Returns:
110
+ A tuple containing the return value of the function and the bounding
111
+ box of the executed analysis in the target projection.
112
+ """
113
+ args = {
114
+ "bounds": self.bounds,
115
+ "proj": self.proj,
116
+ "scale": self.scale,
117
+ "buffer": self.buffer,
118
+ }
119
+ args.update(kwargs)
120
+ return _execute(f, f_args, f_kwargs, clip, **args)
121
+
122
+ def preview(
123
+ self,
124
+ f: WorkerFunction,
125
+ f_args: Union[Iterable, None] = None,
126
+ f_kwargs: Union[Mapping, None] = None,
127
+ bounds: Union[BoundingBox, None] = None,
128
+ max_size: int = 1024,
129
+ **kwargs,
130
+ ) -> Tuple[Any, BoundingBox]:
131
+ """Preview a function by executing it at a reduced scale.
132
+
133
+ The preview method automatically reduces the scale of analysis to fit within `max_size`.
134
+
135
+ Args:
136
+ f (WorkerFunction): The function to execute. The function will
137
+ recieve a cog_worker.worker.Worker as its first argument.
138
+ f_args (list): Additional arguments to pass to the function.
139
+ f_kwargs (dict): Additional keyword arguments to pass to the function.
140
+ bounds (BoundingBox, default: self.bounds): The region to analize.
141
+ max_size (int): The maximum size (width or height) in pixels to
142
+ compute, ignoring any buffer (default: 1024px).
143
+ **kwargs: Additional keyword arguments to overload the Manager's properties.
144
+ (proj or buffer).
145
+
146
+ Returns:
147
+ A tuple containing the return value of the function and the bounding
148
+ box of the executed analysis in the target projection.
149
+ """
150
+ bounds = self.bounds if bounds is None else bounds
151
+ proj = kwargs.pop("proj", self.proj)
152
+ proj = proj if isinstance(proj, Proj) else Proj(proj, preserve_units=False)
153
+ proj_bounds = self.proj.transform_bounds(*bounds)
154
+ width, height = _bbox_size(proj_bounds, self.scale)
155
+ _size = max(width, height)
156
+ scale = self.scale * _size / max_size
157
+
158
+ kwargs.update({"proj_bounds": proj_bounds, "proj": proj, "scale": scale})
159
+
160
+ return self.execute(f, f_args, f_kwargs, **kwargs)
161
+
162
+ def tile(
163
+ self,
164
+ f: WorkerFunction,
165
+ f_args: Union[Iterable, None] = None,
166
+ f_kwargs: Union[Mapping, None] = None,
167
+ z: int = 0,
168
+ x: int = 0,
169
+ y: int = 0,
170
+ tilesize: int = 256,
171
+ **kwargs,
172
+ ) -> Tuple[Any, BoundingBox]:
173
+ """Execute a function for the scale and bounds of a TMS tile.
174
+
175
+ The tile method supports non-global and non-mercator tiling schemes via
176
+ Morecantile. To generate standard web tiles, instantiate the Manager
177
+ with the default parameters.
178
+
179
+ Args:
180
+ f (:obj:`cog_worker.types.WorkerFunction`): The function to execute. The function will
181
+ recieve a cog_worker.worker.Worker as its first argument.
182
+ f_args (list): Additional arguments to pass to the function.
183
+ f_kwargs (dict): Additional keyword arguments to pass to the function.
184
+ bounds (BoundingBox): The region to analize (default: self.bounds)
185
+ max_size (int): The maximum size (width or height) in pixels to compute, ignoring any buffer
186
+ (default: 1024px). Automatically reduces the scale of analysis to fit within `max_size`.
187
+ **kwargs: Additional keyword arguments to overload the Manager's properties.
188
+ (buffer).
189
+
190
+ Returns:
191
+ A tuple containing the return value of the function and the bounding
192
+ box of the executed analysis in the target projection.
193
+ """
194
+ proj_bounds = self.tms.xy_bounds(x, y, z) # type: ignore
195
+ left, bottom, right, top = proj_bounds
196
+ size = max(right - left, top - bottom)
197
+ scale = size / tilesize
198
+
199
+ kwargs.update(
200
+ {
201
+ "proj_bounds": proj_bounds,
202
+ "scale": scale,
203
+ }
204
+ )
205
+
206
+ return self.execute(f, f_args, f_kwargs, **kwargs)
207
+
208
+ def chunk_execute(
209
+ self,
210
+ f: WorkerFunction,
211
+ f_args: Union[Iterable, None] = None,
212
+ f_kwargs: Union[Mapping, None] = None,
213
+ chunksize: int = 512,
214
+ ) -> Iterator[Tuple[Any, BoundingBox]]:
215
+ """Return a generator that executes a function on chunks of at most `chunksize` pixels.
216
+
217
+ Note:
218
+ Manager.chunk_execute computes each chunk sequentially, trading time for reduced memory footprint.
219
+ To run large scale analysis in parallel using dask, see cog_worker.distributed.
220
+
221
+ Note:
222
+ You can estimate the memory requirement of executing a function at a given chunksize as
223
+ ``(chunksize + 2*buffer)**2 * number_of_bands_or_arrays * bit_depth``.
224
+
225
+ Args:
226
+ f (:obj:`cog_worker.types.WorkerFunction`): The function to execute. The function will recieve a
227
+ cog_worker.worker.Worker as its first argument.
228
+ f_args (list): Additional arguments to pass to the function.
229
+ f_kwargs (dict): Additional keyword arguments to pass to the function.
230
+ chunksize (int): Size of the chunks in pixels (excluding buffer).
231
+
232
+ Yields:
233
+ A tuple containing the return value of the function and the bounding
234
+ box of the executed analysis in the target projection.
235
+ """
236
+ for params in self.chunk_params(chunksize):
237
+ yield self.execute(f, f_args, f_kwargs, **params)
238
+
239
+ def chunk_save(
240
+ self,
241
+ dst: Union[str, IO],
242
+ f: WorkerFunction,
243
+ f_args: Union[Iterable, None] = None,
244
+ f_kwargs: Union[Mapping, None] = None,
245
+ chunksize: int = 512,
246
+ **kwargs,
247
+ ):
248
+ """Execute a function in chunks and write each chunk to disk as it is completed.
249
+
250
+ The chunk_save method is identical to Manager.chunk_execute, except it writes results to ``dst``
251
+ instead of yielding them. Manager.chunk_save uses the rasterio GeoTiff driver.
252
+
253
+ Note:
254
+ The function to be executed will recieve a cog_worker.worker.Worker as its first argument and
255
+ should return a 3-dimensional numpy array of ``chunksize`` (optionally plus the buffer pixels).
256
+ e.g.::
257
+
258
+ # Read a cog in chunks and write those chunks to 'test.tif'
259
+ manager.chunk_save('test.tif', lambda worker: worker.read('example-cog-url.tif'))
260
+
261
+ Args:
262
+ dst (str): The file path to write to.
263
+ f (:obj:`cog_worker.types.WorkerFunction`): The function to execute.
264
+ The function will recieve a cog_worker.worker.Worker as its first argument
265
+ and must return a 3-dimensional numpy array of ``chunksize`` (including or excluding the buffer).
266
+ f_args (list): Additional arguments to pass to the function.
267
+ f_kwargs (dict): Additional keyword arguments to pass to the function.
268
+ chunksize (int): Size of the chunks in pixels (excluding buffer).
269
+ **kwargs: Additional keyword arguments to pass to rasterio.open.
270
+ """
271
+ chunks = self.chunk_execute(f, f_args, f_kwargs, chunksize)
272
+ arr, bbox = next(chunks)
273
+ with self._open_writer(dst, arr.shape[0], arr.dtype, **kwargs) as _writer:
274
+ self._write_chunk(_writer, arr, bbox)
275
+ for arr, bbox in chunks:
276
+ self._write_chunk(_writer, arr, bbox)
277
+
278
+ def _open_writer(self, dst: Union[str, IO], count: int, dtype: Type, **kwargs) -> DatasetWriter:
279
+ """Open a rasterio.DatasetWriter with default profile."""
280
+ profile = _get_profile(count, self.scale, self._proj_bounds, self.proj, dtype, **kwargs)
281
+
282
+ return rio.open(dst, "w", **profile) # type: ignore
283
+
284
+ def _write_chunk(
285
+ self,
286
+ writer: DatasetWriter,
287
+ arr: np.ndarray,
288
+ bbox: BoundingBox,
289
+ ):
290
+ """Write a chunk to a rasterio.DatasetWriter."""
291
+ if len(arr.shape) == 2:
292
+ arr = arr[np.newaxis]
293
+ height, width = arr.shape[1:]
294
+
295
+ left, bottom, right, top = bbox
296
+ rows, cols = rasterio.transform.rowcol(
297
+ writer.transform,
298
+ [left],
299
+ [top],
300
+ op=round,
301
+ )
302
+ window = rasterio.windows.Window(min(cols), min(rows), width, height)
303
+
304
+ writer.write(arr, window=window)
305
+ if isinstance(arr, np.ma.MaskedArray):
306
+ mask = np.ma.getmask(arr)
307
+ if len(mask.shape) == 3:
308
+ mask = np.any(mask, axis=0)
309
+ writer.write_mask(~mask, window=window)
310
+
311
+ def chunk_params(self, chunksize: int = 512, **kwargs):
312
+ """Generate parameters to execute a function in chunks.
313
+
314
+ Generates dicts of keyword arguments that can be passed to Manager.execute to run a function in chunks
315
+ of size <chunksize>. This may be useful for distributing tasks to workers to execute in parallel. Each dict
316
+ will contain the projection, scale, bounding box, and buffer. Attributes will be identical except
317
+ for ``proj_bounds`` which define the area to analyze.
318
+
319
+ Note:
320
+ ``manager.chunk_execute(f)`` is equivalent to
321
+ ``(manager.execute(f, **params) for params in manager.chunk_params())``
322
+
323
+ Args:
324
+ chunksize (int): Size of the chunks in pixels (excluding buffer).
325
+ **kwargs: optional additional keyword arguments to save to the dict (to eventually pass to Manager.execute)
326
+ e.g. ``f``, ``f_args``, ``f_kwargs``
327
+
328
+ Yields:
329
+ Dicts of keyword arguments that can be passed to :obj:`cog_worker.manager.Manager.execute()`.
330
+ """
331
+ _args = {
332
+ "proj": self.proj.srs,
333
+ "scale": self.scale,
334
+ "buffer": self.buffer,
335
+ }
336
+ _args.update(kwargs)
337
+
338
+ for proj_bounds in self.chunks(chunksize):
339
+ args = _args.copy()
340
+ args["proj_bounds"] = proj_bounds
341
+ yield args
342
+
343
+ def chunks(self, chunksize: int = 512) -> Iterator[BoundingBox]:
344
+ """Generate bounding boxes for chunks of at most <chunksize> pixels in the managers scale and projection.
345
+
346
+ The chunks method divides the Manager's bounding box into chunks of manageable size.
347
+ Each chunk will be at most <chunksize> pixels, though the geographic extent of the chunk
348
+ depends on the Manager's projection and scale.
349
+
350
+ Args:
351
+ chunksize (int): Size of the chunks in pixels (excluding buffer).
352
+
353
+ Yields:
354
+ BoundingBox: The bounding box of the chunk in the Manager's projection
355
+ """
356
+ xshards, yshards = self._num_chunks(chunksize)
357
+ for i in range(xshards):
358
+ for j in range(yshards):
359
+ bounds = self._chunk_bounds(i, j, chunksize)
360
+ if np.isfinite(bounds).all():
361
+ yield bounds
362
+
363
+ def _chunk_bounds(
364
+ self,
365
+ x: int,
366
+ y: int,
367
+ chunksize: int,
368
+ ) -> BoundingBox:
369
+ """Get the bounding box of a chunk with index <x>,<y>."""
370
+ left, bottom, right, top = self._proj_bounds
371
+ _chunksize = chunksize * self.scale
372
+
373
+ l = left + x * _chunksize # noqa: E741
374
+ r = min(l + _chunksize, right)
375
+ t = top - y * _chunksize
376
+ b = max(t - _chunksize, bottom)
377
+
378
+ return (l, b, r, t)
379
+
380
+ def _num_chunks(
381
+ self,
382
+ chunksize: int,
383
+ ) -> Tuple[int, int]:
384
+ """Return the number of chunks necessary to cover the Manager's bounding box."""
385
+ left, bottom, right, top = self._proj_bounds
386
+ return (
387
+ math.ceil((right - left) / self.scale / chunksize),
388
+ math.ceil((top - bottom) / self.scale / chunksize),
389
+ )
390
+
391
+
392
+ def _execute(
393
+ f: WorkerFunction,
394
+ f_args: Union[Iterable, None] = None,
395
+ f_kwargs: Union[Mapping, None] = None,
396
+ clip: bool = True,
397
+ **kwargs,
398
+ ) -> Tuple[Any, BoundingBox]:
399
+ """Execute a function that takes a cog_worker.worker.Worker as its first parameter.
400
+
401
+ Instantiate a cog_worker.worker.Worker and pass it to the function as its first parameter.
402
+
403
+ Args:
404
+ f (:obj:`cog_worker.types.WorkerFunction`): The function to execute. The function will recieve a
405
+ cog_worker.worker.Worker as its first argument.
406
+ f_args (list): Additional arguments to pass to the function.
407
+ f_kwargs (dict): Additional keyword arguments to pass to the function.
408
+ clip (bool): Whether or not to clip the buffer from the completed analysis.
409
+ **kwargs: Additional keyword arguments to instantiate the cog_worker.worker.Worker
410
+
411
+ Returns:
412
+ A tuple containing the return value of the function and the bounding
413
+ box of the executed analysis in the target projection.
414
+ """
415
+ worker = cog_worker.worker.Worker(**kwargs)
416
+
417
+ f_args = [] if f_args is None else f_args
418
+ f_kwargs = {} if f_kwargs is None else f_kwargs
419
+
420
+ arr: np.ndarray = f(worker, *f_args, **f_kwargs) # type: ignore
421
+ if clip and isinstance(arr, np.ndarray):
422
+ arr = worker.clip_buffer(arr)
423
+
424
+ return arr, worker.bounds
cog_worker/py.typed ADDED
File without changes
cog_worker/types.py ADDED
@@ -0,0 +1,56 @@
1
+ """cog_worker type definitions."""
2
+
3
+ from typing import Callable, Tuple, Union
4
+
5
+ import numpy as np
6
+
7
+ import cog_worker.worker
8
+
9
+ BoundingBox = Tuple[float, float, float, float]
10
+ """A ``(west, south, east, north)`` tuple."""
11
+
12
+ WorkerFunction = Union[
13
+ Callable[["cog_worker.worker.Worker"], np.ndarray],
14
+ Callable,
15
+ ]
16
+ """A function that can recieve a cog_worker.worker.Worker as its first parameter.
17
+
18
+ Additional aguments and keyword arguments can be passed to the Worker function
19
+ at time of execution with the ``f_args`` and ``f_kwargs`` parameters of
20
+ :obj:`cog_worker.manager.Manager.execute()`
21
+
22
+ Example:
23
+
24
+ Read a specific COG and return it as an array::
25
+
26
+ def my_analysis(worker: cog_worker.Worker):
27
+ arr = worker.read('example-cog.tif')
28
+ return arr
29
+
30
+ Read a COG at a given url and get the neighborhood mean for a 1km square kernel::
31
+
32
+ from scipy.ndimage import uniform_filter
33
+
34
+ def my_analysis(worker: cog_worker.Worker, source_url: str):
35
+ arr = worker.read(source_url)
36
+ kernel_size = 1000/worker.scale # in map units (meters)
37
+ return uniform_filter(arr, kernel_size)
38
+
39
+ Read a COG and optionally upload each chunk to an S3 bucket as it is computed::
40
+
41
+ from rasterio import MemoryFile
42
+ import boto3
43
+
44
+ def my_analysis(worker: cog_worker.Worker, dst_bucket: str):
45
+ arr = worker.read('example-cog.tif')
46
+
47
+ if dst_bucket:
48
+ with MemoryFile() as memfile:
49
+ fname = f'output_{worker.scale}_{worker.bounds[0]}_{worker.bounds[3]}.tif'
50
+ worker.write(arr, memfile)
51
+
52
+ memfile.seek(0)
53
+ boto3.client('s3').upload_fileobj(memfile, dst_bucket, fname)
54
+
55
+ return arr
56
+ """
cog_worker/utils.py ADDED
@@ -0,0 +1,40 @@
1
+ """Utility functions."""
2
+
3
+ from typing import Tuple, Type, Union
4
+
5
+ import numpy as np
6
+ from pyproj import Proj
7
+ from rasterio import transform
8
+
9
+ from .types import BoundingBox
10
+
11
+
12
+ def _get_profile(
13
+ count: int, scale: float, proj_bounds: BoundingBox, proj: Proj, dtype: Union[Type, np.dtype], **kwargs
14
+ ) -> dict:
15
+ width, height = _bbox_size(proj_bounds, scale)
16
+ affine = transform.from_origin(proj_bounds[0], proj_bounds[3], scale, scale)
17
+ profile = {
18
+ "driver": "GTiff",
19
+ "interleave": "pixel",
20
+ "blockxsize": 512,
21
+ "blockysize": 512,
22
+ "tiled": True,
23
+ "compress": "lzw",
24
+ "crs": proj.srs,
25
+ "transform": affine,
26
+ "dtype": dtype,
27
+ "width": width,
28
+ "height": height,
29
+ "count": count,
30
+ }
31
+ profile.update(kwargs)
32
+ return profile
33
+
34
+
35
+ def _bbox_size(
36
+ bounds: BoundingBox,
37
+ scale: float,
38
+ ) -> Tuple[int, int]:
39
+ left, bottom, right, top = bounds
40
+ return (round((right - left) / scale), round((top - bottom) / scale))
cog_worker/worker.py ADDED
@@ -0,0 +1,277 @@
1
+ """Reading COGs.
2
+
3
+ The Worker class keeps track of the region, projection, and scale to
4
+ conduct analysis in.
5
+
6
+ When writing cog_worker functions, the main method you will use is
7
+ :obj:`Worker.read()`, which is a wrapper around ``rio_tiler`` to clip,
8
+ reproject and resample the data into the target resolution.
9
+
10
+ Example:
11
+ Read a COG, reprojecting it onto a global 1-deg lat-long grid::
12
+
13
+ from cog_worker import Worker
14
+ from rasterio.plot import show
15
+
16
+ worker = Worker(bounds=(-180, -90, 180, 90), proj=4326, scale=1.0)
17
+ arr = worker.read('example-cog-url.tif')
18
+ show(arr)
19
+ """
20
+
21
+ import logging
22
+ from collections.abc import Sequence
23
+ from typing import Union
24
+
25
+ import numpy as np
26
+ import rasterio as rio
27
+ from pyproj import Proj
28
+ from pyproj.enums import TransformDirection
29
+ from rio_tiler.errors import EmptyMosaicError
30
+ from rio_tiler.io import COGReader
31
+ from rio_tiler.models import ImageData
32
+ from rio_tiler.mosaic.reader import mosaic_reader
33
+
34
+ from cog_worker.types import BoundingBox
35
+ from cog_worker.utils import _bbox_size, _get_profile
36
+
37
+ logger = logging.getLogger(__name__)
38
+
39
+
40
+ class Worker:
41
+ """Class for reading Cloud Optimized GeoTIFFs."""
42
+
43
+ def __init__(
44
+ self,
45
+ bounds: BoundingBox = (-180, -85, 180, 85),
46
+ proj_bounds: Union[BoundingBox, None] = None,
47
+ proj: Union[int, str, Proj] = 3857,
48
+ scale: float = 10000,
49
+ buffer: int = 16,
50
+ ):
51
+ """Initialize a Worker with a bounding box, scale, and projection.
52
+
53
+ Args:
54
+ bounds (BoundingBox): The region to be analyzed as a (west, south, east, north) tuple.
55
+ Ignored when ``proj_bounds`` is provided.
56
+ proj_bounds (BoundingBox): The region to be analyzed in the Worker's projection.
57
+ Overrides ``bounds`` when provided.
58
+ proj (pyproj.Proj, str, int): The projection to analyze in. See
59
+ ``pyproj.Proj`` for valid values (https://pyproj4.github.io/pyproj/).
60
+ scale (float): The pixel size for analysis in the projection's units (usually meters or degrees).
61
+ buffer (int): The number of additional pixels to read on all sides to avoid edge effects.
62
+ The ideal buffer size depends on your analysis (e.g. whether you plan to use convolutions or
63
+ distance functions).
64
+ """
65
+ self._proj = proj if isinstance(proj, Proj) else Proj(proj, preserve_units=False)
66
+
67
+ if proj_bounds is None:
68
+ proj_bounds = self._proj.transform_bounds(*bounds)
69
+
70
+ self._bounds = proj_bounds
71
+ self._scale = scale
72
+ self._buffer = buffer
73
+
74
+ self._width, self._height = _bbox_size(self._bounds, scale)
75
+
76
+ @property
77
+ def proj(self) -> Proj:
78
+ """The projection used for reading."""
79
+ return self._proj
80
+
81
+ @property
82
+ def bounds(self) -> BoundingBox:
83
+ """The the bounding box in projected coordinates."""
84
+ return self._bounds
85
+
86
+ @property
87
+ def scale(self) -> float:
88
+ """The size of pixels in projection units."""
89
+ return self._scale
90
+
91
+ @property
92
+ def buffer(self) -> int:
93
+ """The number of additional pixels to read on all sides of the Worker's bounding box."""
94
+ return self._buffer
95
+
96
+ @property
97
+ def width(self) -> int:
98
+ """The width of the Worker's bounding box in pixels."""
99
+ return self._width
100
+
101
+ @property
102
+ def height(self) -> int:
103
+ """The height of the Worker's bounding box in pixels."""
104
+ return self._height
105
+
106
+ def xy_bounds(self, buffered: bool = False) -> BoundingBox:
107
+ """Return the Worker's bounding box in projected coordinates.
108
+
109
+ Args:
110
+ buffered (bool): Buffer the worker's bounding box
111
+ """
112
+ return self._buffer_bbox() if buffered else self.bounds
113
+
114
+ def lnglat_bounds(self, buffered: bool = False) -> BoundingBox:
115
+ """Return the Worker's bounding box in geographic coordinates.
116
+
117
+ Note:
118
+ When using a projected coordinate system, the geographic bounding box
119
+ that covers the Worker's projected extent may be larger
120
+ than the bounding box used to instantiate the Worker.
121
+
122
+ Args:
123
+ buffered (bool): Buffer the Worker's bounding box.
124
+ """
125
+ pts = max(self.width, self.height) + (buffered * self.buffer * 2) - 1
126
+ pts = min(pts, 10000)
127
+ bounds = self.xy_bounds(buffered)
128
+ return self.proj.transform_bounds(*bounds, pts, direction=TransformDirection.INVERSE)
129
+
130
+ def empty(self, mask: bool = False) -> np.ndarray:
131
+ """Return a zeroed array covering the Worker's extent including the buffer.
132
+
133
+ Args:
134
+ mask (bool): Return a Numpy masked array with all pixels masked.
135
+ Otherwise returns a standard Numpy array filled with zeros.
136
+ """
137
+ arr = np.zeros((1, self.height + self.buffer * 2, self.width + self.buffer * 2))
138
+ if mask:
139
+ _mask = np.ones((1, self.height + self.buffer * 2, self.width + self.buffer * 2))
140
+ arr = np.ma.array(arr, mask=_mask)
141
+ return arr
142
+
143
+ def read(self, src: Union[str, Sequence[str]], masked=True, **kwargs) -> Union[np.ndarray, np.ma.MaskedArray]:
144
+ """Read a COG, reprojecting and clipping as necessary.
145
+
146
+ The read method uses ``rio_tiler.COGReader`` to takes advantage of the
147
+ file structure and internal overviews in COGs, minimizing the amount of
148
+ data that needs to be read and transferred when working at reduced resolutions.
149
+
150
+ In general, any valid GDAL path can be read. This may be a url pointing to a COG, a local
151
+ GeoTIFF or a GDAL virtual file system path. However, it may be very inefficient to
152
+ read data sources that are not valid Cloud Optimized GeoTIFFs.
153
+
154
+ If a list of data sources is provided, ``Worker.read`` will use ``rio_tiler.mosaic_reader``
155
+ to mosaic the sources together.
156
+
157
+ Note:
158
+ The resampling method used to generate the COG's internal overviews will affect
159
+ how it appears at reduced resolutions.
160
+
161
+ Args:
162
+ src (str, list): The data source to read or list of sources to mosiac.
163
+ masked (bool): Return a Numpy masked array, otherwise ignore dataset mask.
164
+ **kwargs: Additional keyword arguments to pass to ``rio_tiler.COGReader.part``
165
+ or ``rio_tiler.mosaic_reader``. See: https://cogeotiff.github.io/rio-tiler/.
166
+
167
+ Returns:
168
+ A Numpy masked array containing the data for the Worker's bounding box and its
169
+ buffer.
170
+
171
+ Note:
172
+ The mask values of a Numpy masked array is the inverse of a GDAL (alpha) mask.
173
+ A masked value of True corresponds to nodata or an alpha value of 0.
174
+ """
175
+ proj_bounds = self._buffer_bbox()
176
+ width, height = _bbox_size(proj_bounds, self._scale)
177
+
178
+ if isinstance(src, str):
179
+ img = _read_cog(src, proj_bounds, self._proj.crs, width, height, **kwargs)
180
+ elif isinstance(src, Sequence):
181
+ try:
182
+ img, asset = mosaic_reader(src, _read_cog, proj_bounds, self.proj.crs, width, height, **kwargs)
183
+ except EmptyMosaicError:
184
+ return self.empty(mask=True)
185
+
186
+ arr = img.array
187
+
188
+ if not masked:
189
+ return arr.data
190
+ return arr
191
+
192
+ def write(self, arr: np.ndarray, dst: str, **kwargs):
193
+ """Write a Numpy array to a GeoTIFF.
194
+
195
+ The write method will create a GeoTIFF with a profile matching the Worker's properties.
196
+ Uses ``rasterio.open`` under the hood.
197
+
198
+ Args:
199
+ arr (numpy.ndarray): The array to write. Must be 2 or 3-dimensional, with a width and
200
+ height matching the Worker (including or excluding the buffer). If the array
201
+ includes the Worker's buffer, the buffer will be clipped before writing.
202
+ dst (str): The file path to write to.
203
+ **kwargs: Additional keyword arguments to pass to rasterio.open
204
+ See: https://rasterio.readthedocs.io/en/latest/topics/writing.html
205
+ """
206
+ arr = self.clip_buffer(arr)
207
+ count, height, width = arr.shape
208
+ profile = _get_profile(count, self.scale, self._bounds, self.proj, arr.dtype, **kwargs)
209
+
210
+ with rio.open(dst, "w", **profile) as writer:
211
+ writer.write(arr)
212
+ if isinstance(arr, np.ma.MaskedArray):
213
+ mask = np.ma.getmask(arr)
214
+ if len(mask.shape) == 3: # noqa PLR2004
215
+ mask = np.any(mask, axis=0)
216
+ writer.write_mask(~mask)
217
+
218
+ def clip_buffer(self, arr: np.ndarray) -> np.ndarray:
219
+ """Clip the buffer pixels from an array if they exist.
220
+
221
+ Args:
222
+ arr (numpy.ndarray): The array to clip.
223
+
224
+ Returns:
225
+ The array with buffer pixels removed.
226
+
227
+ Raises:
228
+ ValueError: If the array's shape does not match the Worker's width and height
229
+ """
230
+ if len(arr.shape) == 2: # noqa PLR2004
231
+ # single band flat array needs extra axis to have the same number of axis as
232
+ # an rgb raster
233
+ arr = arr[np.newaxis]
234
+ buffer_width = self.width + self.buffer * 2
235
+ buffer_height = self.height + self.buffer * 2
236
+ c, h, w = arr.shape
237
+ if w == self.width and h == self.height:
238
+ return arr
239
+ elif w == buffer_width and h == buffer_height:
240
+ return arr[:, self.buffer : -self.buffer, self.buffer : -self.buffer]
241
+ else:
242
+ raise ValueError(
243
+ f"Array not expected size. Was {w}x{h} expected {self.width}x{self.height} or {buffer_width}x{buffer_height}" # noqa: E501
244
+ )
245
+
246
+ def _buffer_bbox(self) -> BoundingBox:
247
+ """Returns the worker's bounding box extended by the buffered pixels."""
248
+ l, b, r, t = self._bounds # noqa: E741
249
+ _buffer = self.buffer * self.scale
250
+
251
+ return (l - _buffer, b - _buffer, r + _buffer, t + _buffer)
252
+
253
+
254
+ def _read_cog(
255
+ asset: str,
256
+ proj_bounds: BoundingBox,
257
+ crs: Union[str, int, Proj],
258
+ width: int,
259
+ height: int,
260
+ **kwargs,
261
+ ) -> ImageData:
262
+ """Read part of a COG, warping and resampling to a target shape."""
263
+ with COGReader(asset) as cog: # type: ignore
264
+ part = cog.part(proj_bounds, bounds_crs=crs, dst_crs=crs, max_size=None, width=width, height=height, **kwargs)
265
+ # 2024-07-18
266
+ # This is wrong.
267
+ # Numpy masked array fill_value is fixed for all ints to 999999 which does not fit the 8 and 16 bits ints.
268
+ # This is "not an issue" for numpy < 2 since it allow conversion of out of bounds integer arrays but in > 2 it
269
+ # is not allowed making the serialization and deserializition of masked arrays a problem in multiprocessing.
270
+ #
271
+ # `fill_value` is a property where the setter checks and casts the value if possible.
272
+ # By setting the fill_value to itself, numpy overflows silently the np.int64(999999) to some trash.
273
+ # This allows serializing and deserializing the masked array without issues in numpy 2.x.
274
+ # We know this is nonsense but this has been the numpy way for decades and it is expected that the fill method
275
+ # is rarely or never used at all.
276
+ part.array.fill_value = part.array.fill_value
277
+ return part
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 Simbiotica. S.L.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,188 @@
1
+ Metadata-Version: 2.1
2
+ Name: cog_worker
3
+ Version: 0.3.0
4
+ Summary: Scalable geospatial analysis on Cloud Optimized GeoTIFFs.
5
+ Author-email: Francis Gassert <francis.gassert@vizzuality.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2021 Simbiotica. S.L.
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/vizzuality/cog_worker
29
+ Project-URL: Issues, https://github.com/vizzuality/cog_worker/issues
30
+ Keywords: cog,geotiff,raster,gdal,rasterio,dask
31
+ Classifier: Intended Audience :: Information Technology
32
+ Classifier: Intended Audience :: Science/Research
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3.8
36
+ Classifier: Programming Language :: Python :: 3.9
37
+ Classifier: Programming Language :: Python :: 3.10
38
+ Classifier: Programming Language :: Python :: 3.11
39
+ Classifier: Programming Language :: Python :: 3.12
40
+ Classifier: Topic :: Scientific/Engineering :: GIS
41
+ Requires-Python: >=3.9
42
+ Description-Content-Type: text/markdown
43
+ License-File: LICENSE.txt
44
+ Requires-Dist: numpy >=1
45
+ Requires-Dist: pyproj >=3.0.0
46
+ Requires-Dist: rasterio >=1.3
47
+ Requires-Dist: morecantile <6.0.0,>=5.0.0
48
+ Requires-Dist: rio-tiler <7.0.0,>=6.0.0
49
+ Provides-Extra: dev
50
+ Requires-Dist: pre-commit ; extra == 'dev'
51
+ Requires-Dist: bump-my-version ; extra == 'dev'
52
+ Provides-Extra: distributed
53
+ Requires-Dist: dask[distributed] ; extra == 'distributed'
54
+ Provides-Extra: docs
55
+ Requires-Dist: Sphinx ; extra == 'docs'
56
+ Requires-Dist: sphinxcontrib-napoleon ; extra == 'docs'
57
+ Requires-Dist: furo ; extra == 'docs'
58
+ Requires-Dist: nbsphinx ; extra == 'docs'
59
+ Requires-Dist: nbconvert ; extra == 'docs'
60
+ Provides-Extra: test
61
+ Requires-Dist: pytest ; extra == 'test'
62
+
63
+ # Cog Worker
64
+
65
+ Scalable geospatial analysis on Cloud Optimized GeoTIFFs.
66
+
67
+ - **Documentation**: https://vizzuality.github.io/cog_worker
68
+ - **PyPI**: https://pypi.org/project/cog-worker
69
+
70
+ cog_worker is a simple library to help write scripts to conduct scaleable
71
+ analysis of gridded data. It's intended to be useful for moderate- to large-scale
72
+ GIS, remote sensing, and machine learning applications.
73
+
74
+ ## Installation
75
+
76
+ ```
77
+ pip install cog_worker
78
+ ```
79
+
80
+ ## Examples
81
+
82
+ See `docs/examples` for Jupyter notebook examples
83
+
84
+ ## Quick start
85
+
86
+ 0. A simple cog_worker script
87
+
88
+ ```python
89
+ from rasterio.plot import show
90
+ from cog_worker import Manager
91
+
92
+ def my_analysis(worker):
93
+ arr = worker.read('roads_cog.tif')
94
+ return arr
95
+
96
+ manager = Manager(proj='wgs84', scale=0.083333)
97
+ arr, bbox = manager.preview(my_analysis)
98
+ show(arr)
99
+ ```
100
+
101
+ 1. Define an analysis function that recieves a cog_worker.Worker as the first parameter.
102
+
103
+ ```python
104
+ from cog_worker import Worker, Manager
105
+ import numpy as np
106
+
107
+ # Define an analysis function to read and process COG data sources
108
+ def MyAnalysis(worker: Worker) -> np.ndarray:
109
+
110
+ # 1. Read a COG (reprojecting, resampling and clipping as necessary)
111
+ array: np.ndarray = worker.read('roads_cog.tif')
112
+
113
+ # 2. Work on the array
114
+ # ...
115
+
116
+ # 3. Return (or post to blob storage etc.)
117
+ return array
118
+ ```
119
+
120
+ 2. Run your analysis in different scales and projections
121
+
122
+ ```python
123
+ import rasterio as rio
124
+
125
+ # Run your analysis using a cog_worker.Manager which handles chunking
126
+ manager = Manager(
127
+ proj = 'wgs84', # any pyproj string
128
+ scale = 0.083333, # in projection units (degrees or meters)
129
+ bounds = (-180, -90, 180, 90),
130
+ buffer = 128 # buffer pixels when chunking analysis
131
+ )
132
+
133
+ # preview analysis
134
+ arr, bbox = manager.preview(MyAnalysis, max_size=1024)
135
+ rio.plot.show(arr)
136
+
137
+ # preview analysis chunks
138
+ for bbox in manager.chunks(chunksize=1500):
139
+ print(bbox)
140
+
141
+ # execute analysis chunks sequentially
142
+ for arr, bbox in manager.chunk_execute(MyAnalysis, chunksize=1500):
143
+ rio.plot.show(arr)
144
+
145
+ # generate job execution parameters
146
+ for params in manager.chunk_params(chunksize=1500):
147
+ print(params)
148
+ ```
149
+
150
+ 3. Write scale-dependent functions¶
151
+
152
+ ```python
153
+ import scipy
154
+
155
+ def focal_mean(
156
+ worker: Worker,
157
+ kernel_radius: float = 1000 # radius in projection units (meters)
158
+ ) -> np.ndarray:
159
+
160
+ array: np.ndarray = worker.read('sample-geotiff.tif')
161
+
162
+ # Access the pixel size at worker.scale
163
+ kernel_size = kernel_radius * 2 / worker.scale
164
+ array = scipy.ndimage.uniform_filter(array, kernel_size)
165
+
166
+ return array
167
+ ```
168
+
169
+ 4. Chunk your analysis and run it in a dask cluster
170
+
171
+ ```python
172
+ from cog_worker.distributed import DaskManager
173
+ from dask.distributed import LocalCluster, Client
174
+
175
+ # Set up a Manager with that connects to a Dask cluster
176
+ cluster = LocalCluster()
177
+ client = Client(cluster)
178
+ distributed_manager = DaskManager(
179
+ client,
180
+ proj = 'wgs84',
181
+ scale = 0.083333,
182
+ bounds = (-180, -90, 180, 90),
183
+ buffer = 128
184
+ )
185
+
186
+ # Execute in worker pool and save chunks to disk as they complete.
187
+ distributed_manager.chunk_save('output.tif', MyAnalysis, chunksize=2048)
188
+ ```
@@ -0,0 +1,12 @@
1
+ cog_worker/__init__.py,sha256=cMjDemK58ri18Sv9BJOyAz47hpcRH97bcdvFEh-103E,314
2
+ cog_worker/distributed.py,sha256=X3r-1Ef04nlyTZisMhMU1A6xZL6_NsAuCLAFAzjXncI,6900
3
+ cog_worker/manager.py,sha256=YB6BeoU2hqyTHrfO_vXVUQ3bTDMvBihAGG_X6_2O5Oc,17456
4
+ cog_worker/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ cog_worker/types.py,sha256=9UKK4Zie1XMbT_pHttRDC8Zb2kxkoxQ4xdfbJLM2cVo,1781
6
+ cog_worker/utils.py,sha256=jRGUecMR-0vzfiYoLL3uBz0eNRLGYE5py_pxRo76y1E,1018
7
+ cog_worker/worker.py,sha256=ckp0BH_isKKD-nPiwmVLKesvan5Iqs1srQ_ju3tbLN4,11352
8
+ cog_worker-0.3.0.dist-info/LICENSE.txt,sha256=Sd40qFfjMndidtlw_mKQ5TPBLSpk-wSFMtfDM1qTfoA,1073
9
+ cog_worker-0.3.0.dist-info/METADATA,sha256=eBsRiQCCRfvclSGDVTaPYGdlYgqlPBGRSrKH-7I9340,5999
10
+ cog_worker-0.3.0.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
11
+ cog_worker-0.3.0.dist-info/top_level.txt,sha256=jUWy8Vkc6yjVPFbIip1rX4W37aFxIAfpiVFs8mbmOr0,11
12
+ cog_worker-0.3.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (71.1.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ cog_worker