drb-chunk 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- drb/chunk/__init__.py +16 -0
- drb/chunk/_version.py +21 -0
- drb/chunk/chunk.py +140 -0
- drb/chunk/core.py +123 -0
- drb/chunk/descriptor.py +144 -0
- drb/chunk/exceptions.py +9 -0
- drb/chunk/interop.py +29 -0
- drb/chunk/model.py +88 -0
- drb/chunk/readers.py +70 -0
- drb/chunk/selection.py +165 -0
- drb/chunk/tiling.py +107 -0
- drb_chunk-0.2.0.dist-info/METADATA +576 -0
- drb_chunk-0.2.0.dist-info/RECORD +17 -0
- drb_chunk-0.2.0.dist-info/WHEEL +5 -0
- drb_chunk-0.2.0.dist-info/entry_points.txt +2 -0
- drb_chunk-0.2.0.dist-info/licenses/LICENCE.txt +165 -0
- drb_chunk-0.2.0.dist-info/top_level.txt +1 -0
drb/chunk/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from . import _version
|
|
2
|
+
from drb.chunk.exceptions import DrbChunkError, DrbSelectionError
|
|
3
|
+
from drb.chunk.selection import Selection, parse_selection
|
|
4
|
+
from drb.chunk.tiling import TilingScheme, RegularGrid
|
|
5
|
+
from drb.chunk.model import ChunkArray, ChunkRef, ChunkManifest
|
|
6
|
+
from drb.chunk.chunk import Chunk
|
|
7
|
+
from drb.chunk.core import ChunkAddon
|
|
8
|
+
from drb.chunk.interop import to_kerchunk
|
|
9
|
+
|
|
10
|
+
__version__ = _version.get_versions()["version"]
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"Chunk", "ChunkAddon", "ChunkArray", "ChunkRef", "ChunkManifest",
|
|
14
|
+
"Selection", "parse_selection", "TilingScheme", "RegularGrid",
|
|
15
|
+
"to_kerchunk", "DrbChunkError", "DrbSelectionError", "__version__",
|
|
16
|
+
]
|
drb/chunk/_version.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
|
|
2
|
+
# This file was generated by 'versioneer.py' (0.29) from
|
|
3
|
+
# revision-control system data, or from the parent directory name of an
|
|
4
|
+
# unpacked source archive. Distribution tarballs contain a pre-generated copy
|
|
5
|
+
# of this file.
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
|
|
9
|
+
version_json = '''
|
|
10
|
+
{
|
|
11
|
+
"date": "2026-06-30T04:50:46+0000",
|
|
12
|
+
"dirty": false,
|
|
13
|
+
"error": null,
|
|
14
|
+
"full-revisionid": "c2cf2e26834f78f38f0a6a5162142494c001af69",
|
|
15
|
+
"version": "0.2.0"
|
|
16
|
+
}
|
|
17
|
+
''' # END VERSION_JSON
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_versions():
|
|
21
|
+
return json.loads(version_json)
|
drb/chunk/chunk.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# drb/chunk/chunk.py
|
|
2
|
+
from typing import Any, Iterable, Optional, Tuple
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from drb.core.node import DrbNode
|
|
7
|
+
|
|
8
|
+
from drb.chunk.exceptions import DrbChunkError
|
|
9
|
+
from drb.chunk.model import ChunkArray, ChunkRef, ChunkManifest
|
|
10
|
+
from drb.chunk.readers import select_reader
|
|
11
|
+
from drb.chunk.selection import Selection
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Chunk:
|
|
15
|
+
def __init__(
|
|
16
|
+
self, name: str, array: ChunkArray, node: DrbNode,
|
|
17
|
+
manifest: ChunkManifest, *, selection: Optional[Selection] = None,
|
|
18
|
+
reader: Optional[str] = None, topic_uri: Optional[str] = None,
|
|
19
|
+
addon_data: Optional[dict] = None):
|
|
20
|
+
self.name = name
|
|
21
|
+
self.array = array
|
|
22
|
+
self.node = node
|
|
23
|
+
self._manifest = manifest
|
|
24
|
+
self._selection = selection
|
|
25
|
+
self._reader = reader
|
|
26
|
+
self._topic_uri = topic_uri
|
|
27
|
+
self.addon_data = addon_data or {}
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def grid_shape(self) -> Tuple[int, ...]:
|
|
31
|
+
return self.array.grid_shape
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def manifest(self) -> ChunkManifest:
|
|
35
|
+
return self._manifest
|
|
36
|
+
|
|
37
|
+
def tiles(self) -> Iterable[Tuple[int, ...]]:
|
|
38
|
+
return self._manifest.keys()
|
|
39
|
+
|
|
40
|
+
def tile(self, key: Tuple[int, ...]) -> ChunkRef:
|
|
41
|
+
return self._manifest.ref(key)
|
|
42
|
+
|
|
43
|
+
def select(self, selection: Selection) -> "Chunk":
|
|
44
|
+
"""Lazy narrowing: resolve keys via the scheme, subset the manifest,
|
|
45
|
+
read nothing."""
|
|
46
|
+
resolved = self.array.scheme.resolve(selection, self.array)
|
|
47
|
+
return Chunk(
|
|
48
|
+
name=self.name, array=self.array, node=self.node,
|
|
49
|
+
manifest=self._manifest.subset(resolved),
|
|
50
|
+
selection=selection, reader=self._reader,
|
|
51
|
+
topic_uri=self._topic_uri, addon_data=self.addon_data,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def __getitem__(self, selection: Selection) -> "Chunk":
|
|
55
|
+
return self.select(selection)
|
|
56
|
+
|
|
57
|
+
def get_impl(self, impl: type, **kwargs) -> Any:
|
|
58
|
+
if impl is np.ndarray:
|
|
59
|
+
return self._materialise_numpy()
|
|
60
|
+
try:
|
|
61
|
+
import xarray
|
|
62
|
+
except ImportError: # pragma: no cover
|
|
63
|
+
xarray = None
|
|
64
|
+
if xarray is not None and impl is xarray.DataArray:
|
|
65
|
+
return self.to_xarray()
|
|
66
|
+
raise DrbChunkError(
|
|
67
|
+
f"Unsupported chunk impl {impl!r}. Use numpy.ndarray or "
|
|
68
|
+
f"xarray.DataArray."
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def _materialise_numpy(self):
|
|
72
|
+
keys = list(self._manifest.keys())
|
|
73
|
+
if len(keys) != 1:
|
|
74
|
+
raise DrbChunkError(
|
|
75
|
+
f"Direct numpy materialisation needs a single tile; this "
|
|
76
|
+
f"selection spans {len(keys)} tiles. Use to_xarray() (lazy "
|
|
77
|
+
f"xarray) for multi-tile assembly."
|
|
78
|
+
)
|
|
79
|
+
ref = self._manifest.ref(keys[0])
|
|
80
|
+
reader = select_reader(ref, self.array, hint=self._reader)
|
|
81
|
+
resolved = self.array.scheme.resolve(self._selection, self.array) \
|
|
82
|
+
if self._selection is not None else None
|
|
83
|
+
return reader.read(ref, resolved)
|
|
84
|
+
|
|
85
|
+
def to_xarray(self):
|
|
86
|
+
"""Dask-backed lazy xarray.DataArray assembled from the manifest."""
|
|
87
|
+
import dask.array as da
|
|
88
|
+
import xarray as xr
|
|
89
|
+
|
|
90
|
+
resolved = (
|
|
91
|
+
self.array.scheme.resolve(self._selection, self.array)
|
|
92
|
+
if self._selection is not None else None
|
|
93
|
+
)
|
|
94
|
+
keys = list(self._manifest.keys())
|
|
95
|
+
if not keys:
|
|
96
|
+
raise DrbChunkError("Empty chunk selection.")
|
|
97
|
+
blocks = []
|
|
98
|
+
for key in keys:
|
|
99
|
+
ref = self._manifest.ref(key)
|
|
100
|
+
reader = select_reader(ref, self.array, hint=self._reader)
|
|
101
|
+
shape = tuple(stop - start for (start, stop) in ref.window)
|
|
102
|
+
delayed = da.from_delayed(
|
|
103
|
+
_delayed_read(reader, ref, resolved),
|
|
104
|
+
shape=_block_shape(ref, shape),
|
|
105
|
+
dtype=self.array.dtype)
|
|
106
|
+
blocks.append((key, delayed))
|
|
107
|
+
# v1: single-tile fast path; multi-tile mosaic is future work.
|
|
108
|
+
if len(blocks) == 1:
|
|
109
|
+
data = blocks[0][1]
|
|
110
|
+
else:
|
|
111
|
+
raise DrbChunkError(
|
|
112
|
+
"Multi-tile lazy assembly (concat/mosaic) is not yet "
|
|
113
|
+
"implemented; select a single tile or one window per tile.")
|
|
114
|
+
return xr.DataArray(data, dims=("band",) + self.array.dims)
|
|
115
|
+
|
|
116
|
+
def locator(self) -> dict:
|
|
117
|
+
return {
|
|
118
|
+
"source": getattr(self.node, "path", None) and str(self.node.path),
|
|
119
|
+
"topic": self._topic_uri,
|
|
120
|
+
"chunk": self.name,
|
|
121
|
+
"selection": self._selection.to_dict()
|
|
122
|
+
if self._selection is not None else None,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
@classmethod
|
|
126
|
+
def from_locator(cls, data: dict) -> "Chunk":
|
|
127
|
+
raise DrbChunkError(
|
|
128
|
+
"locator round-trip (from_locator) is a v1 deferral; "
|
|
129
|
+
"tracked as a follow-up."
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _block_shape(ref: ChunkRef, window_shape) -> tuple:
|
|
134
|
+
# rasterio read returns (bands, h, w); v1 assumes a leading band axis.
|
|
135
|
+
return (1,) + tuple(window_shape)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _delayed_read(reader, ref, resolved):
|
|
139
|
+
import dask
|
|
140
|
+
return dask.delayed(reader.read)(ref, resolved)
|
drb/chunk/core.py
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
# drb/chunk/core.py
|
|
2
|
+
from typing import Any, List, Optional, Tuple, Union
|
|
3
|
+
|
|
4
|
+
from drb.addons.addon import Addon
|
|
5
|
+
from drb.core.node import DrbNode
|
|
6
|
+
from drb.topics.topic import DrbTopic
|
|
7
|
+
import drb.topics.resolver as resolver
|
|
8
|
+
|
|
9
|
+
from drb.chunk.chunk import Chunk
|
|
10
|
+
from drb.chunk.descriptor import ChunkDescriptor, retrieve_chunks
|
|
11
|
+
from drb.chunk.exceptions import DrbChunkError
|
|
12
|
+
from drb.chunk.model import ChunkArray, RegularGridManifest
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ChunkAddon(Addon):
|
|
16
|
+
__instance = None
|
|
17
|
+
|
|
18
|
+
def __new__(cls, *args, **kwargs):
|
|
19
|
+
if cls.__instance is None:
|
|
20
|
+
cls.__instance = super().__new__(cls, *args, **kwargs)
|
|
21
|
+
return cls.__instance
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
def identifier(cls) -> str:
|
|
25
|
+
return "chunk"
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def return_type(cls) -> type:
|
|
29
|
+
return Chunk
|
|
30
|
+
|
|
31
|
+
def can_apply(self, source: DrbTopic) -> bool:
|
|
32
|
+
return bool(retrieve_chunks(source))
|
|
33
|
+
|
|
34
|
+
def available_chunks(self, source: DrbTopic) -> List[Tuple[str, dict]]:
|
|
35
|
+
return [(cd.name, cd.scheme.to_dict())
|
|
36
|
+
for cd in retrieve_chunks(source).values()]
|
|
37
|
+
|
|
38
|
+
def available_collections(self, source: DrbTopic) -> dict:
|
|
39
|
+
"""Map collection name -> list of chunk names declared for it.
|
|
40
|
+
|
|
41
|
+
Chunks without a drb:collection are grouped under the key None.
|
|
42
|
+
"""
|
|
43
|
+
result: dict = {}
|
|
44
|
+
for cd in retrieve_chunks(source).values():
|
|
45
|
+
result.setdefault(cd.collection, []).append(cd.name)
|
|
46
|
+
return result
|
|
47
|
+
|
|
48
|
+
def apply(self, node: DrbNode, *, chunk_name: Optional[str] = None,
|
|
49
|
+
collection: Optional[str] = None,
|
|
50
|
+
**kwargs) -> Union[Chunk, List[Chunk]]:
|
|
51
|
+
topic = kwargs.get("topic") or resolver.resolve(node)[0]
|
|
52
|
+
descriptors = retrieve_chunks(topic)
|
|
53
|
+
if not descriptors:
|
|
54
|
+
raise DrbChunkError(
|
|
55
|
+
f"Topic '{getattr(topic, 'label', topic)}' declares no "
|
|
56
|
+
f"drb:chunk.")
|
|
57
|
+
if chunk_name is not None and collection is not None:
|
|
58
|
+
raise DrbChunkError(
|
|
59
|
+
"Pass either chunk_name or collection, not both.")
|
|
60
|
+
if collection is not None:
|
|
61
|
+
selected = [cd for cd in descriptors.values()
|
|
62
|
+
if cd.collection == collection]
|
|
63
|
+
if not selected:
|
|
64
|
+
available = sorted({cd.collection
|
|
65
|
+
for cd in descriptors.values()
|
|
66
|
+
if cd.collection is not None})
|
|
67
|
+
raise DrbChunkError(
|
|
68
|
+
f"No collection '{collection}'. "
|
|
69
|
+
f"Available: {', '.join(available)}.")
|
|
70
|
+
return [self._build(node, topic, cd) for cd in selected]
|
|
71
|
+
if chunk_name is not None:
|
|
72
|
+
if chunk_name not in descriptors:
|
|
73
|
+
raise DrbChunkError(
|
|
74
|
+
f"No chunk '{chunk_name}' in topic "
|
|
75
|
+
f"'{getattr(topic, 'label', topic)}'. "
|
|
76
|
+
f"Available: {', '.join(sorted(descriptors))}."
|
|
77
|
+
)
|
|
78
|
+
return self._build(node, topic, descriptors[chunk_name])
|
|
79
|
+
return [self._build(node, topic, cd) for cd in descriptors.values()]
|
|
80
|
+
|
|
81
|
+
def _build(self, node: DrbNode, topic: DrbTopic,
|
|
82
|
+
cd: ChunkDescriptor) -> Chunk:
|
|
83
|
+
if cd.selection is not None:
|
|
84
|
+
raise DrbChunkError(
|
|
85
|
+
f"Chunk '{cd.name}': descriptor-level default "
|
|
86
|
+
f"drb:selection is a v1 deferral; apply selections "
|
|
87
|
+
f"explicitly via Chunk.select()."
|
|
88
|
+
)
|
|
89
|
+
source = cd.source.extract(node)
|
|
90
|
+
if source is None:
|
|
91
|
+
raise DrbChunkError(
|
|
92
|
+
f"Chunk '{cd.name}': no node matched its source "
|
|
93
|
+
f"(drb:source) in the product.")
|
|
94
|
+
source_node = self._resolve_source(node, source)
|
|
95
|
+
array = ChunkArray(dims=cd.dims,
|
|
96
|
+
shape=self._infer_shape(source_node, cd),
|
|
97
|
+
dtype=cd.dtype, scheme=cd.scheme)
|
|
98
|
+
manifest = RegularGridManifest(array, source_node)
|
|
99
|
+
return Chunk(name=cd.name, array=array, node=source_node,
|
|
100
|
+
manifest=manifest, reader=cd.reader,
|
|
101
|
+
topic_uri=getattr(topic, "uri", None))
|
|
102
|
+
|
|
103
|
+
@staticmethod
|
|
104
|
+
def _resolve_source(node: DrbNode, source: Any) -> DrbNode:
|
|
105
|
+
if isinstance(source, DrbNode):
|
|
106
|
+
return source
|
|
107
|
+
if source in (".", "", None):
|
|
108
|
+
return node
|
|
109
|
+
# A path/URL string -> resolve it against DRB.
|
|
110
|
+
return resolver.create(str(source))
|
|
111
|
+
|
|
112
|
+
@staticmethod
|
|
113
|
+
def _infer_shape(
|
|
114
|
+
source_node: DrbNode, cd: ChunkDescriptor) -> Tuple[int, ...]:
|
|
115
|
+
"""Read array shape from the source's existing rasterio impl.
|
|
116
|
+
No driver change: uses get_impl(DatasetReader).height/width/count."""
|
|
117
|
+
try:
|
|
118
|
+
from rasterio.io import DatasetReader
|
|
119
|
+
ds = source_node.get_impl(DatasetReader)
|
|
120
|
+
return (ds.height, ds.width)
|
|
121
|
+
except Exception as exc: # noqa: BLE001
|
|
122
|
+
raise DrbChunkError(
|
|
123
|
+
f"Cannot infer shape for chunk '{cd.name}': {exc}") from exc
|
drb/chunk/descriptor.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# drb/chunk/descriptor.py
|
|
2
|
+
from dataclasses import dataclass, field
|
|
3
|
+
from typing import Dict, List, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
import rdflib
|
|
6
|
+
from rdflib.collection import Collection
|
|
7
|
+
|
|
8
|
+
from drb.extractor import ConstantExtractor, Extractor, parse_extractor
|
|
9
|
+
from drb.exceptions.core import DrbException
|
|
10
|
+
from drb.topics.dao import ManagerDao
|
|
11
|
+
from drb.topics.topic import DrbTopic
|
|
12
|
+
|
|
13
|
+
from drb.chunk.exceptions import DrbChunkError
|
|
14
|
+
from drb.chunk.selection import Selection, parse_selection
|
|
15
|
+
from drb.chunk.tiling import RegularGrid, TilingScheme
|
|
16
|
+
|
|
17
|
+
DRB = rdflib.Namespace("http://www.gael.fr/drb#")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class ChunkDescriptor:
|
|
22
|
+
name: str
|
|
23
|
+
source: Extractor
|
|
24
|
+
dims: Tuple[str, ...]
|
|
25
|
+
dtype: str
|
|
26
|
+
scheme: TilingScheme
|
|
27
|
+
selection: Optional[Selection] = None
|
|
28
|
+
reader: Optional[str] = None
|
|
29
|
+
collection: Optional[str] = None
|
|
30
|
+
aux_data: dict = field(default_factory=dict)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _rdf_list(graph: rdflib.Graph, node) -> list:
|
|
34
|
+
return [str(item) for item in Collection(graph, node)]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _scheme_from_chunk(graph: rdflib.Graph, chunk) -> TilingScheme:
|
|
38
|
+
scheme_name = graph.value(chunk, DRB.tilingScheme)
|
|
39
|
+
scheme_name = str(scheme_name) if scheme_name is not None else "regular"
|
|
40
|
+
if scheme_name != "regular":
|
|
41
|
+
raise DrbChunkError(
|
|
42
|
+
f"Unsupported tilingScheme '{scheme_name}'. v1 supports 'regular'."
|
|
43
|
+
)
|
|
44
|
+
shape_node = graph.value(chunk, DRB.chunkShape)
|
|
45
|
+
if shape_node is not None:
|
|
46
|
+
chunk_shape = tuple(int(v) for v in _rdf_list(graph, shape_node))
|
|
47
|
+
else:
|
|
48
|
+
height = graph.value(chunk, DRB.tileHeight)
|
|
49
|
+
width = graph.value(chunk, DRB.tileWidth)
|
|
50
|
+
if height is None or width is None:
|
|
51
|
+
raise DrbChunkError(
|
|
52
|
+
"RegularGrid needs drb:chunkShape or drb:tileWidth+tileHeight."
|
|
53
|
+
)
|
|
54
|
+
chunk_shape = (int(height), int(width)) # dims are (y, x)
|
|
55
|
+
return RegularGrid(chunk_shape=chunk_shape)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Typed extractor predicates on a drb:source blank node, mapped to the
|
|
59
|
+
# drb.extractor parse_extractor factory keys.
|
|
60
|
+
_SOURCE_KEYS = (
|
|
61
|
+
("xquery", DRB.xquery),
|
|
62
|
+
("python", DRB.python),
|
|
63
|
+
("script", DRB.script),
|
|
64
|
+
("constant", DRB.constant),
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _source_extractor(graph: rdflib.Graph, chunk) -> Extractor:
|
|
69
|
+
"""Build the source Extractor for one drb:chunk.
|
|
70
|
+
|
|
71
|
+
A bare literal stays a ConstantExtractor (v1: "." = the node itself,
|
|
72
|
+
or a path/URL resolved by the add-on). A typed blank node
|
|
73
|
+
``drb:source [ drb:xquery "…" ]`` (or drb:python/script/constant) is
|
|
74
|
+
dispatched through drb.extractor.parse_extractor; an XQuery navigates
|
|
75
|
+
the product node and returns the band DrbNode.
|
|
76
|
+
"""
|
|
77
|
+
value = graph.value(chunk, DRB.source)
|
|
78
|
+
if value is None:
|
|
79
|
+
raise DrbChunkError("drb:chunk: missing drb:source.")
|
|
80
|
+
if isinstance(value, rdflib.Literal):
|
|
81
|
+
return ConstantExtractor(str(value))
|
|
82
|
+
for key, predicate in _SOURCE_KEYS:
|
|
83
|
+
obj = graph.value(value, predicate)
|
|
84
|
+
if obj is not None:
|
|
85
|
+
return parse_extractor({key: str(obj)})
|
|
86
|
+
raise DrbChunkError(
|
|
87
|
+
"drb:source blank node must carry one of drb:xquery, drb:python, "
|
|
88
|
+
"drb:script or drb:constant."
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _load_chunks_from_graph(uri: str,
|
|
93
|
+
graph: rdflib.Graph) -> List[ChunkDescriptor]:
|
|
94
|
+
subject = rdflib.URIRef(uri)
|
|
95
|
+
descriptors: List[ChunkDescriptor] = []
|
|
96
|
+
for chunk in graph.objects(subject, DRB.chunk):
|
|
97
|
+
name = graph.value(chunk, DRB.chunkName)
|
|
98
|
+
dims_node = graph.value(chunk, DRB.dims)
|
|
99
|
+
dtype = graph.value(chunk, DRB.dtype)
|
|
100
|
+
reader = graph.value(chunk, DRB.reader)
|
|
101
|
+
collection_node = graph.value(chunk, DRB.collection)
|
|
102
|
+
sel_node = graph.value(chunk, DRB.selection)
|
|
103
|
+
descriptors.append(ChunkDescriptor(
|
|
104
|
+
name=str(name),
|
|
105
|
+
source=_source_extractor(graph, chunk),
|
|
106
|
+
dims=tuple(_rdf_list(graph, dims_node)) if dims_node else (),
|
|
107
|
+
dtype=str(dtype) if dtype is not None else None,
|
|
108
|
+
scheme=_scheme_from_chunk(graph, chunk),
|
|
109
|
+
selection=_selection_from_node(graph, sel_node),
|
|
110
|
+
reader=str(reader) if reader is not None else None,
|
|
111
|
+
collection=(str(collection_node)
|
|
112
|
+
if collection_node is not None
|
|
113
|
+
else None),
|
|
114
|
+
))
|
|
115
|
+
return descriptors
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _selection_from_node(graph: rdflib.Graph, node) -> Optional[Selection]:
|
|
119
|
+
if node is None:
|
|
120
|
+
return None
|
|
121
|
+
bands = graph.value(node, DRB.band)
|
|
122
|
+
if bands is not None:
|
|
123
|
+
return parse_selection({"band": [int(bands)]})
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def retrieve_chunks(topic: DrbTopic) -> Dict[str, ChunkDescriptor]:
|
|
128
|
+
"""Mirror MetadataAddon._retrieve_metadata: inherit via subClassOf, then
|
|
129
|
+
override with the topic's own chunks, reading the public dao.graph."""
|
|
130
|
+
chunks: Dict[str, ChunkDescriptor] = {}
|
|
131
|
+
if topic.subClassOf is not None:
|
|
132
|
+
for parent_id in topic.subClassOf:
|
|
133
|
+
parent = ManagerDao().get_drb_topic(parent_id)
|
|
134
|
+
chunks.update(retrieve_chunks(parent))
|
|
135
|
+
if topic.uri is not None:
|
|
136
|
+
try:
|
|
137
|
+
dao = ManagerDao().get_dao(topic.id)
|
|
138
|
+
except DrbException:
|
|
139
|
+
dao = None
|
|
140
|
+
graph = getattr(dao, "graph", None)
|
|
141
|
+
if graph is not None:
|
|
142
|
+
for cd in _load_chunks_from_graph(topic.uri, graph):
|
|
143
|
+
chunks[cd.name] = cd
|
|
144
|
+
return chunks
|
drb/chunk/exceptions.py
ADDED
drb/chunk/interop.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from drb.chunk.chunk import Chunk
|
|
2
|
+
from drb.chunk.exceptions import DrbChunkError
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def to_kerchunk(chunk: Chunk) -> dict:
|
|
6
|
+
"""Emit the manifest as a kerchunk reference-spec v1 dict.
|
|
7
|
+
|
|
8
|
+
kerchunk addresses chunks by byte range in a source file, so only chunks
|
|
9
|
+
with a ChunkRef.byte_range can be exported. Window-only (format-native)
|
|
10
|
+
chunks are not representable and raise DrbChunkError.
|
|
11
|
+
"""
|
|
12
|
+
refs = {}
|
|
13
|
+
for key in chunk.manifest.keys():
|
|
14
|
+
ref = chunk.manifest.ref(key)
|
|
15
|
+
if ref.byte_range is None:
|
|
16
|
+
raise DrbChunkError(
|
|
17
|
+
"to_kerchunk requires byte-range chunks; this chunk uses "
|
|
18
|
+
"format-native windows. Export is not supported."
|
|
19
|
+
)
|
|
20
|
+
offset, length = ref.byte_range
|
|
21
|
+
source = getattr(ref.source, "path", None)
|
|
22
|
+
if source is None:
|
|
23
|
+
raise DrbChunkError(
|
|
24
|
+
f"Chunk source for key {key} has no 'path'; cannot export a "
|
|
25
|
+
f"kerchunk reference."
|
|
26
|
+
)
|
|
27
|
+
key_str = ".".join(str(k) for k in key)
|
|
28
|
+
refs[key_str] = [str(source), offset, length]
|
|
29
|
+
return {"version": 1, "refs": refs}
|
drb/chunk/model.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import abc
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from typing import Iterable, Optional, Tuple
|
|
4
|
+
|
|
5
|
+
from drb.core.node import DrbNode
|
|
6
|
+
|
|
7
|
+
from drb.chunk.exceptions import DrbChunkError
|
|
8
|
+
from drb.chunk.selection import ResolvedSelection
|
|
9
|
+
from drb.chunk.tiling import RegularGrid, TilingScheme
|
|
10
|
+
|
|
11
|
+
Window = Tuple[Tuple[int, int], ...]
|
|
12
|
+
"""per-dim (start, stop) — geometry-agnostic"""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class ChunkArray:
|
|
17
|
+
dims: Tuple[str, ...]
|
|
18
|
+
shape: Tuple[int, ...]
|
|
19
|
+
dtype: str
|
|
20
|
+
scheme: TilingScheme
|
|
21
|
+
coords: Optional[dict] = None
|
|
22
|
+
attrs: Optional[dict] = None
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def grid_shape(self) -> Tuple[int, ...]:
|
|
26
|
+
if isinstance(self.scheme, RegularGrid):
|
|
27
|
+
return self.scheme.grid_shape(self)
|
|
28
|
+
raise DrbChunkError(
|
|
29
|
+
f"grid_shape is only defined for RegularGrid, not "
|
|
30
|
+
f"{type(self.scheme).__name__}."
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass
|
|
35
|
+
class ChunkRef:
|
|
36
|
+
key: Tuple[int, ...]
|
|
37
|
+
source: DrbNode
|
|
38
|
+
byte_range: Optional[Tuple[int, int]] = None
|
|
39
|
+
window: Optional[Window] = None
|
|
40
|
+
codec: Optional[dict] = None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ChunkManifest(abc.ABC):
|
|
44
|
+
"""Lazy Mapping[key -> ChunkRef] over the scheme's keys."""
|
|
45
|
+
|
|
46
|
+
@abc.abstractmethod
|
|
47
|
+
def ref(self, key: Tuple[int, ...]) -> ChunkRef:
|
|
48
|
+
...
|
|
49
|
+
|
|
50
|
+
@abc.abstractmethod
|
|
51
|
+
def keys(self) -> Iterable[Tuple[int, ...]]:
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
def subset(self, resolved: ResolvedSelection) -> "ChunkManifest":
|
|
55
|
+
return _SubsetManifest(self, list(resolved.keys))
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class RegularGridManifest(ChunkManifest):
|
|
59
|
+
"""Manifest over a RegularGrid; windows derived from key * chunk_shape."""
|
|
60
|
+
|
|
61
|
+
def __init__(self, array: ChunkArray, source: DrbNode):
|
|
62
|
+
if not isinstance(array.scheme, RegularGrid):
|
|
63
|
+
raise DrbChunkError("RegularGridManifest requires a RegularGrid.")
|
|
64
|
+
self._array = array
|
|
65
|
+
self._source = source
|
|
66
|
+
self._chunk_shape = array.scheme.chunk_shape
|
|
67
|
+
|
|
68
|
+
def ref(self, key: Tuple[int, ...]) -> ChunkRef:
|
|
69
|
+
window = tuple(
|
|
70
|
+
(k * c, min((k + 1) * c, s))
|
|
71
|
+
for k, c, s in zip(key, self._chunk_shape, self._array.shape)
|
|
72
|
+
)
|
|
73
|
+
return ChunkRef(key=key, source=self._source, window=window)
|
|
74
|
+
|
|
75
|
+
def keys(self) -> Iterable[Tuple[int, ...]]:
|
|
76
|
+
return self._array.scheme.keys(self._array)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class _SubsetManifest(ChunkManifest):
|
|
80
|
+
def __init__(self, parent: ChunkManifest, keys):
|
|
81
|
+
self._parent = parent
|
|
82
|
+
self._keys = keys
|
|
83
|
+
|
|
84
|
+
def ref(self, key: Tuple[int, ...]) -> ChunkRef:
|
|
85
|
+
return self._parent.ref(key)
|
|
86
|
+
|
|
87
|
+
def keys(self) -> Iterable[Tuple[int, ...]]:
|
|
88
|
+
return iter(self._keys)
|
drb/chunk/readers.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# drb/chunk/readers.py
|
|
2
|
+
import abc
|
|
3
|
+
import io
|
|
4
|
+
from typing import Any, Optional
|
|
5
|
+
|
|
6
|
+
from drb.chunk.exceptions import DrbChunkError
|
|
7
|
+
from drb.chunk.model import ChunkArray, ChunkRef
|
|
8
|
+
from drb.chunk.selection import ResolvedSelection
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ReaderStrategy(abc.ABC):
|
|
12
|
+
"""Materialises one chunk using source node's existing get_impl."""
|
|
13
|
+
|
|
14
|
+
@abc.abstractmethod
|
|
15
|
+
def can_read(self, ref: ChunkRef, array: ChunkArray) -> bool:
|
|
16
|
+
...
|
|
17
|
+
|
|
18
|
+
@abc.abstractmethod
|
|
19
|
+
def read(self, ref: ChunkRef, resolved: ResolvedSelection) -> Any:
|
|
20
|
+
...
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class RasterWindowReader(ReaderStrategy):
|
|
24
|
+
"""Windowed rasterio read via the source's EXISTING get_impl. The driver
|
|
25
|
+
(impl/image) is not modified."""
|
|
26
|
+
|
|
27
|
+
def can_read(self, ref: ChunkRef, array: ChunkArray) -> bool:
|
|
28
|
+
return ref.window is not None
|
|
29
|
+
|
|
30
|
+
def read(self, ref: ChunkRef, resolved: ResolvedSelection) -> Any:
|
|
31
|
+
from rasterio.io import DatasetReader
|
|
32
|
+
dataset = ref.source.get_impl(DatasetReader)
|
|
33
|
+
# Pass window as tuple-of-ranges; rasterio normalises it internally.
|
|
34
|
+
return dataset.read(window=ref.window)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ByteRangeReader(ReaderStrategy):
|
|
38
|
+
"""Reads ref.byte_range via the source driver's partial GET."""
|
|
39
|
+
|
|
40
|
+
def can_read(self, ref: ChunkRef, array: ChunkArray) -> bool:
|
|
41
|
+
return ref.byte_range is not None
|
|
42
|
+
|
|
43
|
+
def read(self, ref: ChunkRef, resolved: ResolvedSelection) -> Any:
|
|
44
|
+
offset, length = ref.byte_range
|
|
45
|
+
stream = ref.source.get_impl(io.BytesIO)
|
|
46
|
+
stream.seek(offset)
|
|
47
|
+
return stream.read(length)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
_STRATEGIES = (RasterWindowReader(), ByteRangeReader())
|
|
51
|
+
_BY_NAME = {"raster": RasterWindowReader, "range": ByteRangeReader}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def select_reader(ref: ChunkRef, array: ChunkArray,
|
|
55
|
+
hint: Optional[str] = None) -> ReaderStrategy:
|
|
56
|
+
if hint is not None:
|
|
57
|
+
try:
|
|
58
|
+
return _BY_NAME[hint]()
|
|
59
|
+
except KeyError as exc:
|
|
60
|
+
raise DrbChunkError(
|
|
61
|
+
f"Unknown reader hint '{hint}'. "
|
|
62
|
+
f"Available: {', '.join(sorted(_BY_NAME))}."
|
|
63
|
+
) from exc
|
|
64
|
+
for strategy in _STRATEGIES:
|
|
65
|
+
if strategy.can_read(ref, array):
|
|
66
|
+
return strategy
|
|
67
|
+
raise DrbChunkError(
|
|
68
|
+
"No reader strategy applies to this chunk (no window, no byte_range). "
|
|
69
|
+
"Refusing to fall back to a full read."
|
|
70
|
+
)
|