drb-chunk 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
drb/chunk/__init__.py ADDED
@@ -0,0 +1,16 @@
1
+ from . import _version
2
+ from drb.chunk.exceptions import DrbChunkError, DrbSelectionError
3
+ from drb.chunk.selection import Selection, parse_selection
4
+ from drb.chunk.tiling import TilingScheme, RegularGrid
5
+ from drb.chunk.model import ChunkArray, ChunkRef, ChunkManifest
6
+ from drb.chunk.chunk import Chunk
7
+ from drb.chunk.core import ChunkAddon
8
+ from drb.chunk.interop import to_kerchunk
9
+
10
+ __version__ = _version.get_versions()["version"]
11
+
12
+ __all__ = [
13
+ "Chunk", "ChunkAddon", "ChunkArray", "ChunkRef", "ChunkManifest",
14
+ "Selection", "parse_selection", "TilingScheme", "RegularGrid",
15
+ "to_kerchunk", "DrbChunkError", "DrbSelectionError", "__version__",
16
+ ]
drb/chunk/_version.py ADDED
@@ -0,0 +1,21 @@
1
+
2
+ # This file was generated by 'versioneer.py' (0.29) from
3
+ # revision-control system data, or from the parent directory name of an
4
+ # unpacked source archive. Distribution tarballs contain a pre-generated copy
5
+ # of this file.
6
+
7
+ import json
8
+
9
+ version_json = '''
10
+ {
11
+ "date": "2026-06-30T04:50:46+0000",
12
+ "dirty": false,
13
+ "error": null,
14
+ "full-revisionid": "c2cf2e26834f78f38f0a6a5162142494c001af69",
15
+ "version": "0.2.0"
16
+ }
17
+ ''' # END VERSION_JSON
18
+
19
+
20
+ def get_versions():
21
+ return json.loads(version_json)
drb/chunk/chunk.py ADDED
@@ -0,0 +1,140 @@
1
+ # drb/chunk/chunk.py
2
+ from typing import Any, Iterable, Optional, Tuple
3
+
4
+ import numpy as np
5
+
6
+ from drb.core.node import DrbNode
7
+
8
+ from drb.chunk.exceptions import DrbChunkError
9
+ from drb.chunk.model import ChunkArray, ChunkRef, ChunkManifest
10
+ from drb.chunk.readers import select_reader
11
+ from drb.chunk.selection import Selection
12
+
13
+
14
+ class Chunk:
15
+ def __init__(
16
+ self, name: str, array: ChunkArray, node: DrbNode,
17
+ manifest: ChunkManifest, *, selection: Optional[Selection] = None,
18
+ reader: Optional[str] = None, topic_uri: Optional[str] = None,
19
+ addon_data: Optional[dict] = None):
20
+ self.name = name
21
+ self.array = array
22
+ self.node = node
23
+ self._manifest = manifest
24
+ self._selection = selection
25
+ self._reader = reader
26
+ self._topic_uri = topic_uri
27
+ self.addon_data = addon_data or {}
28
+
29
+ @property
30
+ def grid_shape(self) -> Tuple[int, ...]:
31
+ return self.array.grid_shape
32
+
33
+ @property
34
+ def manifest(self) -> ChunkManifest:
35
+ return self._manifest
36
+
37
+ def tiles(self) -> Iterable[Tuple[int, ...]]:
38
+ return self._manifest.keys()
39
+
40
+ def tile(self, key: Tuple[int, ...]) -> ChunkRef:
41
+ return self._manifest.ref(key)
42
+
43
+ def select(self, selection: Selection) -> "Chunk":
44
+ """Lazy narrowing: resolve keys via the scheme, subset the manifest,
45
+ read nothing."""
46
+ resolved = self.array.scheme.resolve(selection, self.array)
47
+ return Chunk(
48
+ name=self.name, array=self.array, node=self.node,
49
+ manifest=self._manifest.subset(resolved),
50
+ selection=selection, reader=self._reader,
51
+ topic_uri=self._topic_uri, addon_data=self.addon_data,
52
+ )
53
+
54
+ def __getitem__(self, selection: Selection) -> "Chunk":
55
+ return self.select(selection)
56
+
57
+ def get_impl(self, impl: type, **kwargs) -> Any:
58
+ if impl is np.ndarray:
59
+ return self._materialise_numpy()
60
+ try:
61
+ import xarray
62
+ except ImportError: # pragma: no cover
63
+ xarray = None
64
+ if xarray is not None and impl is xarray.DataArray:
65
+ return self.to_xarray()
66
+ raise DrbChunkError(
67
+ f"Unsupported chunk impl {impl!r}. Use numpy.ndarray or "
68
+ f"xarray.DataArray."
69
+ )
70
+
71
+ def _materialise_numpy(self):
72
+ keys = list(self._manifest.keys())
73
+ if len(keys) != 1:
74
+ raise DrbChunkError(
75
+ f"Direct numpy materialisation needs a single tile; this "
76
+ f"selection spans {len(keys)} tiles. Use to_xarray() (lazy "
77
+ f"xarray) for multi-tile assembly."
78
+ )
79
+ ref = self._manifest.ref(keys[0])
80
+ reader = select_reader(ref, self.array, hint=self._reader)
81
+ resolved = self.array.scheme.resolve(self._selection, self.array) \
82
+ if self._selection is not None else None
83
+ return reader.read(ref, resolved)
84
+
85
+ def to_xarray(self):
86
+ """Dask-backed lazy xarray.DataArray assembled from the manifest."""
87
+ import dask.array as da
88
+ import xarray as xr
89
+
90
+ resolved = (
91
+ self.array.scheme.resolve(self._selection, self.array)
92
+ if self._selection is not None else None
93
+ )
94
+ keys = list(self._manifest.keys())
95
+ if not keys:
96
+ raise DrbChunkError("Empty chunk selection.")
97
+ blocks = []
98
+ for key in keys:
99
+ ref = self._manifest.ref(key)
100
+ reader = select_reader(ref, self.array, hint=self._reader)
101
+ shape = tuple(stop - start for (start, stop) in ref.window)
102
+ delayed = da.from_delayed(
103
+ _delayed_read(reader, ref, resolved),
104
+ shape=_block_shape(ref, shape),
105
+ dtype=self.array.dtype)
106
+ blocks.append((key, delayed))
107
+ # v1: single-tile fast path; multi-tile mosaic is future work.
108
+ if len(blocks) == 1:
109
+ data = blocks[0][1]
110
+ else:
111
+ raise DrbChunkError(
112
+ "Multi-tile lazy assembly (concat/mosaic) is not yet "
113
+ "implemented; select a single tile or one window per tile.")
114
+ return xr.DataArray(data, dims=("band",) + self.array.dims)
115
+
116
+ def locator(self) -> dict:
117
+ return {
118
+ "source": getattr(self.node, "path", None) and str(self.node.path),
119
+ "topic": self._topic_uri,
120
+ "chunk": self.name,
121
+ "selection": self._selection.to_dict()
122
+ if self._selection is not None else None,
123
+ }
124
+
125
+ @classmethod
126
+ def from_locator(cls, data: dict) -> "Chunk":
127
+ raise DrbChunkError(
128
+ "locator round-trip (from_locator) is a v1 deferral; "
129
+ "tracked as a follow-up."
130
+ )
131
+
132
+
133
+ def _block_shape(ref: ChunkRef, window_shape) -> tuple:
134
+ # rasterio read returns (bands, h, w); v1 assumes a leading band axis.
135
+ return (1,) + tuple(window_shape)
136
+
137
+
138
+ def _delayed_read(reader, ref, resolved):
139
+ import dask
140
+ return dask.delayed(reader.read)(ref, resolved)
drb/chunk/core.py ADDED
@@ -0,0 +1,123 @@
1
+ # drb/chunk/core.py
2
+ from typing import Any, List, Optional, Tuple, Union
3
+
4
+ from drb.addons.addon import Addon
5
+ from drb.core.node import DrbNode
6
+ from drb.topics.topic import DrbTopic
7
+ import drb.topics.resolver as resolver
8
+
9
+ from drb.chunk.chunk import Chunk
10
+ from drb.chunk.descriptor import ChunkDescriptor, retrieve_chunks
11
+ from drb.chunk.exceptions import DrbChunkError
12
+ from drb.chunk.model import ChunkArray, RegularGridManifest
13
+
14
+
15
+ class ChunkAddon(Addon):
16
+ __instance = None
17
+
18
+ def __new__(cls, *args, **kwargs):
19
+ if cls.__instance is None:
20
+ cls.__instance = super().__new__(cls, *args, **kwargs)
21
+ return cls.__instance
22
+
23
+ @classmethod
24
+ def identifier(cls) -> str:
25
+ return "chunk"
26
+
27
+ @classmethod
28
+ def return_type(cls) -> type:
29
+ return Chunk
30
+
31
+ def can_apply(self, source: DrbTopic) -> bool:
32
+ return bool(retrieve_chunks(source))
33
+
34
+ def available_chunks(self, source: DrbTopic) -> List[Tuple[str, dict]]:
35
+ return [(cd.name, cd.scheme.to_dict())
36
+ for cd in retrieve_chunks(source).values()]
37
+
38
+ def available_collections(self, source: DrbTopic) -> dict:
39
+ """Map collection name -> list of chunk names declared for it.
40
+
41
+ Chunks without a drb:collection are grouped under the key None.
42
+ """
43
+ result: dict = {}
44
+ for cd in retrieve_chunks(source).values():
45
+ result.setdefault(cd.collection, []).append(cd.name)
46
+ return result
47
+
48
+ def apply(self, node: DrbNode, *, chunk_name: Optional[str] = None,
49
+ collection: Optional[str] = None,
50
+ **kwargs) -> Union[Chunk, List[Chunk]]:
51
+ topic = kwargs.get("topic") or resolver.resolve(node)[0]
52
+ descriptors = retrieve_chunks(topic)
53
+ if not descriptors:
54
+ raise DrbChunkError(
55
+ f"Topic '{getattr(topic, 'label', topic)}' declares no "
56
+ f"drb:chunk.")
57
+ if chunk_name is not None and collection is not None:
58
+ raise DrbChunkError(
59
+ "Pass either chunk_name or collection, not both.")
60
+ if collection is not None:
61
+ selected = [cd for cd in descriptors.values()
62
+ if cd.collection == collection]
63
+ if not selected:
64
+ available = sorted({cd.collection
65
+ for cd in descriptors.values()
66
+ if cd.collection is not None})
67
+ raise DrbChunkError(
68
+ f"No collection '{collection}'. "
69
+ f"Available: {', '.join(available)}.")
70
+ return [self._build(node, topic, cd) for cd in selected]
71
+ if chunk_name is not None:
72
+ if chunk_name not in descriptors:
73
+ raise DrbChunkError(
74
+ f"No chunk '{chunk_name}' in topic "
75
+ f"'{getattr(topic, 'label', topic)}'. "
76
+ f"Available: {', '.join(sorted(descriptors))}."
77
+ )
78
+ return self._build(node, topic, descriptors[chunk_name])
79
+ return [self._build(node, topic, cd) for cd in descriptors.values()]
80
+
81
+ def _build(self, node: DrbNode, topic: DrbTopic,
82
+ cd: ChunkDescriptor) -> Chunk:
83
+ if cd.selection is not None:
84
+ raise DrbChunkError(
85
+ f"Chunk '{cd.name}': descriptor-level default "
86
+ f"drb:selection is a v1 deferral; apply selections "
87
+ f"explicitly via Chunk.select()."
88
+ )
89
+ source = cd.source.extract(node)
90
+ if source is None:
91
+ raise DrbChunkError(
92
+ f"Chunk '{cd.name}': no node matched its source "
93
+ f"(drb:source) in the product.")
94
+ source_node = self._resolve_source(node, source)
95
+ array = ChunkArray(dims=cd.dims,
96
+ shape=self._infer_shape(source_node, cd),
97
+ dtype=cd.dtype, scheme=cd.scheme)
98
+ manifest = RegularGridManifest(array, source_node)
99
+ return Chunk(name=cd.name, array=array, node=source_node,
100
+ manifest=manifest, reader=cd.reader,
101
+ topic_uri=getattr(topic, "uri", None))
102
+
103
+ @staticmethod
104
+ def _resolve_source(node: DrbNode, source: Any) -> DrbNode:
105
+ if isinstance(source, DrbNode):
106
+ return source
107
+ if source in (".", "", None):
108
+ return node
109
+ # A path/URL string -> resolve it against DRB.
110
+ return resolver.create(str(source))
111
+
112
+ @staticmethod
113
+ def _infer_shape(
114
+ source_node: DrbNode, cd: ChunkDescriptor) -> Tuple[int, ...]:
115
+ """Read array shape from the source's existing rasterio impl.
116
+ No driver change: uses get_impl(DatasetReader).height/width/count."""
117
+ try:
118
+ from rasterio.io import DatasetReader
119
+ ds = source_node.get_impl(DatasetReader)
120
+ return (ds.height, ds.width)
121
+ except Exception as exc: # noqa: BLE001
122
+ raise DrbChunkError(
123
+ f"Cannot infer shape for chunk '{cd.name}': {exc}") from exc
@@ -0,0 +1,144 @@
1
+ # drb/chunk/descriptor.py
2
+ from dataclasses import dataclass, field
3
+ from typing import Dict, List, Optional, Tuple
4
+
5
+ import rdflib
6
+ from rdflib.collection import Collection
7
+
8
+ from drb.extractor import ConstantExtractor, Extractor, parse_extractor
9
+ from drb.exceptions.core import DrbException
10
+ from drb.topics.dao import ManagerDao
11
+ from drb.topics.topic import DrbTopic
12
+
13
+ from drb.chunk.exceptions import DrbChunkError
14
+ from drb.chunk.selection import Selection, parse_selection
15
+ from drb.chunk.tiling import RegularGrid, TilingScheme
16
+
17
+ DRB = rdflib.Namespace("http://www.gael.fr/drb#")
18
+
19
+
20
+ @dataclass
21
+ class ChunkDescriptor:
22
+ name: str
23
+ source: Extractor
24
+ dims: Tuple[str, ...]
25
+ dtype: str
26
+ scheme: TilingScheme
27
+ selection: Optional[Selection] = None
28
+ reader: Optional[str] = None
29
+ collection: Optional[str] = None
30
+ aux_data: dict = field(default_factory=dict)
31
+
32
+
33
+ def _rdf_list(graph: rdflib.Graph, node) -> list:
34
+ return [str(item) for item in Collection(graph, node)]
35
+
36
+
37
+ def _scheme_from_chunk(graph: rdflib.Graph, chunk) -> TilingScheme:
38
+ scheme_name = graph.value(chunk, DRB.tilingScheme)
39
+ scheme_name = str(scheme_name) if scheme_name is not None else "regular"
40
+ if scheme_name != "regular":
41
+ raise DrbChunkError(
42
+ f"Unsupported tilingScheme '{scheme_name}'. v1 supports 'regular'."
43
+ )
44
+ shape_node = graph.value(chunk, DRB.chunkShape)
45
+ if shape_node is not None:
46
+ chunk_shape = tuple(int(v) for v in _rdf_list(graph, shape_node))
47
+ else:
48
+ height = graph.value(chunk, DRB.tileHeight)
49
+ width = graph.value(chunk, DRB.tileWidth)
50
+ if height is None or width is None:
51
+ raise DrbChunkError(
52
+ "RegularGrid needs drb:chunkShape or drb:tileWidth+tileHeight."
53
+ )
54
+ chunk_shape = (int(height), int(width)) # dims are (y, x)
55
+ return RegularGrid(chunk_shape=chunk_shape)
56
+
57
+
58
+ # Typed extractor predicates on a drb:source blank node, mapped to the
59
+ # drb.extractor parse_extractor factory keys.
60
+ _SOURCE_KEYS = (
61
+ ("xquery", DRB.xquery),
62
+ ("python", DRB.python),
63
+ ("script", DRB.script),
64
+ ("constant", DRB.constant),
65
+ )
66
+
67
+
68
+ def _source_extractor(graph: rdflib.Graph, chunk) -> Extractor:
69
+ """Build the source Extractor for one drb:chunk.
70
+
71
+ A bare literal stays a ConstantExtractor (v1: "." = the node itself,
72
+ or a path/URL resolved by the add-on). A typed blank node
73
+ ``drb:source [ drb:xquery "…" ]`` (or drb:python/script/constant) is
74
+ dispatched through drb.extractor.parse_extractor; an XQuery navigates
75
+ the product node and returns the band DrbNode.
76
+ """
77
+ value = graph.value(chunk, DRB.source)
78
+ if value is None:
79
+ raise DrbChunkError("drb:chunk: missing drb:source.")
80
+ if isinstance(value, rdflib.Literal):
81
+ return ConstantExtractor(str(value))
82
+ for key, predicate in _SOURCE_KEYS:
83
+ obj = graph.value(value, predicate)
84
+ if obj is not None:
85
+ return parse_extractor({key: str(obj)})
86
+ raise DrbChunkError(
87
+ "drb:source blank node must carry one of drb:xquery, drb:python, "
88
+ "drb:script or drb:constant."
89
+ )
90
+
91
+
92
+ def _load_chunks_from_graph(uri: str,
93
+ graph: rdflib.Graph) -> List[ChunkDescriptor]:
94
+ subject = rdflib.URIRef(uri)
95
+ descriptors: List[ChunkDescriptor] = []
96
+ for chunk in graph.objects(subject, DRB.chunk):
97
+ name = graph.value(chunk, DRB.chunkName)
98
+ dims_node = graph.value(chunk, DRB.dims)
99
+ dtype = graph.value(chunk, DRB.dtype)
100
+ reader = graph.value(chunk, DRB.reader)
101
+ collection_node = graph.value(chunk, DRB.collection)
102
+ sel_node = graph.value(chunk, DRB.selection)
103
+ descriptors.append(ChunkDescriptor(
104
+ name=str(name),
105
+ source=_source_extractor(graph, chunk),
106
+ dims=tuple(_rdf_list(graph, dims_node)) if dims_node else (),
107
+ dtype=str(dtype) if dtype is not None else None,
108
+ scheme=_scheme_from_chunk(graph, chunk),
109
+ selection=_selection_from_node(graph, sel_node),
110
+ reader=str(reader) if reader is not None else None,
111
+ collection=(str(collection_node)
112
+ if collection_node is not None
113
+ else None),
114
+ ))
115
+ return descriptors
116
+
117
+
118
+ def _selection_from_node(graph: rdflib.Graph, node) -> Optional[Selection]:
119
+ if node is None:
120
+ return None
121
+ bands = graph.value(node, DRB.band)
122
+ if bands is not None:
123
+ return parse_selection({"band": [int(bands)]})
124
+ return None
125
+
126
+
127
+ def retrieve_chunks(topic: DrbTopic) -> Dict[str, ChunkDescriptor]:
128
+ """Mirror MetadataAddon._retrieve_metadata: inherit via subClassOf, then
129
+ override with the topic's own chunks, reading the public dao.graph."""
130
+ chunks: Dict[str, ChunkDescriptor] = {}
131
+ if topic.subClassOf is not None:
132
+ for parent_id in topic.subClassOf:
133
+ parent = ManagerDao().get_drb_topic(parent_id)
134
+ chunks.update(retrieve_chunks(parent))
135
+ if topic.uri is not None:
136
+ try:
137
+ dao = ManagerDao().get_dao(topic.id)
138
+ except DrbException:
139
+ dao = None
140
+ graph = getattr(dao, "graph", None)
141
+ if graph is not None:
142
+ for cd in _load_chunks_from_graph(topic.uri, graph):
143
+ chunks[cd.name] = cd
144
+ return chunks
@@ -0,0 +1,9 @@
1
+ from drb.exceptions.core import DrbException
2
+
3
+
4
+ class DrbChunkError(DrbException):
5
+ """Base error for the drb-chunk add-on."""
6
+
7
+
8
+ class DrbSelectionError(DrbChunkError):
9
+ """Raised on an unknown selection type or an out-of-bounds region."""
drb/chunk/interop.py ADDED
@@ -0,0 +1,29 @@
1
+ from drb.chunk.chunk import Chunk
2
+ from drb.chunk.exceptions import DrbChunkError
3
+
4
+
5
+ def to_kerchunk(chunk: Chunk) -> dict:
6
+ """Emit the manifest as a kerchunk reference-spec v1 dict.
7
+
8
+ kerchunk addresses chunks by byte range in a source file, so only chunks
9
+ with a ChunkRef.byte_range can be exported. Window-only (format-native)
10
+ chunks are not representable and raise DrbChunkError.
11
+ """
12
+ refs = {}
13
+ for key in chunk.manifest.keys():
14
+ ref = chunk.manifest.ref(key)
15
+ if ref.byte_range is None:
16
+ raise DrbChunkError(
17
+ "to_kerchunk requires byte-range chunks; this chunk uses "
18
+ "format-native windows. Export is not supported."
19
+ )
20
+ offset, length = ref.byte_range
21
+ source = getattr(ref.source, "path", None)
22
+ if source is None:
23
+ raise DrbChunkError(
24
+ f"Chunk source for key {key} has no 'path'; cannot export a "
25
+ f"kerchunk reference."
26
+ )
27
+ key_str = ".".join(str(k) for k in key)
28
+ refs[key_str] = [str(source), offset, length]
29
+ return {"version": 1, "refs": refs}
drb/chunk/model.py ADDED
@@ -0,0 +1,88 @@
1
+ import abc
2
+ from dataclasses import dataclass
3
+ from typing import Iterable, Optional, Tuple
4
+
5
+ from drb.core.node import DrbNode
6
+
7
+ from drb.chunk.exceptions import DrbChunkError
8
+ from drb.chunk.selection import ResolvedSelection
9
+ from drb.chunk.tiling import RegularGrid, TilingScheme
10
+
11
+ Window = Tuple[Tuple[int, int], ...]
12
+ """per-dim (start, stop) — geometry-agnostic"""
13
+
14
+
15
+ @dataclass
16
+ class ChunkArray:
17
+ dims: Tuple[str, ...]
18
+ shape: Tuple[int, ...]
19
+ dtype: str
20
+ scheme: TilingScheme
21
+ coords: Optional[dict] = None
22
+ attrs: Optional[dict] = None
23
+
24
+ @property
25
+ def grid_shape(self) -> Tuple[int, ...]:
26
+ if isinstance(self.scheme, RegularGrid):
27
+ return self.scheme.grid_shape(self)
28
+ raise DrbChunkError(
29
+ f"grid_shape is only defined for RegularGrid, not "
30
+ f"{type(self.scheme).__name__}."
31
+ )
32
+
33
+
34
+ @dataclass
35
+ class ChunkRef:
36
+ key: Tuple[int, ...]
37
+ source: DrbNode
38
+ byte_range: Optional[Tuple[int, int]] = None
39
+ window: Optional[Window] = None
40
+ codec: Optional[dict] = None
41
+
42
+
43
+ class ChunkManifest(abc.ABC):
44
+ """Lazy Mapping[key -> ChunkRef] over the scheme's keys."""
45
+
46
+ @abc.abstractmethod
47
+ def ref(self, key: Tuple[int, ...]) -> ChunkRef:
48
+ ...
49
+
50
+ @abc.abstractmethod
51
+ def keys(self) -> Iterable[Tuple[int, ...]]:
52
+ ...
53
+
54
+ def subset(self, resolved: ResolvedSelection) -> "ChunkManifest":
55
+ return _SubsetManifest(self, list(resolved.keys))
56
+
57
+
58
+ class RegularGridManifest(ChunkManifest):
59
+ """Manifest over a RegularGrid; windows derived from key * chunk_shape."""
60
+
61
+ def __init__(self, array: ChunkArray, source: DrbNode):
62
+ if not isinstance(array.scheme, RegularGrid):
63
+ raise DrbChunkError("RegularGridManifest requires a RegularGrid.")
64
+ self._array = array
65
+ self._source = source
66
+ self._chunk_shape = array.scheme.chunk_shape
67
+
68
+ def ref(self, key: Tuple[int, ...]) -> ChunkRef:
69
+ window = tuple(
70
+ (k * c, min((k + 1) * c, s))
71
+ for k, c, s in zip(key, self._chunk_shape, self._array.shape)
72
+ )
73
+ return ChunkRef(key=key, source=self._source, window=window)
74
+
75
+ def keys(self) -> Iterable[Tuple[int, ...]]:
76
+ return self._array.scheme.keys(self._array)
77
+
78
+
79
+ class _SubsetManifest(ChunkManifest):
80
+ def __init__(self, parent: ChunkManifest, keys):
81
+ self._parent = parent
82
+ self._keys = keys
83
+
84
+ def ref(self, key: Tuple[int, ...]) -> ChunkRef:
85
+ return self._parent.ref(key)
86
+
87
+ def keys(self) -> Iterable[Tuple[int, ...]]:
88
+ return iter(self._keys)
drb/chunk/readers.py ADDED
@@ -0,0 +1,70 @@
1
+ # drb/chunk/readers.py
2
+ import abc
3
+ import io
4
+ from typing import Any, Optional
5
+
6
+ from drb.chunk.exceptions import DrbChunkError
7
+ from drb.chunk.model import ChunkArray, ChunkRef
8
+ from drb.chunk.selection import ResolvedSelection
9
+
10
+
11
+ class ReaderStrategy(abc.ABC):
12
+ """Materialises one chunk using source node's existing get_impl."""
13
+
14
+ @abc.abstractmethod
15
+ def can_read(self, ref: ChunkRef, array: ChunkArray) -> bool:
16
+ ...
17
+
18
+ @abc.abstractmethod
19
+ def read(self, ref: ChunkRef, resolved: ResolvedSelection) -> Any:
20
+ ...
21
+
22
+
23
+ class RasterWindowReader(ReaderStrategy):
24
+ """Windowed rasterio read via the source's EXISTING get_impl. The driver
25
+ (impl/image) is not modified."""
26
+
27
+ def can_read(self, ref: ChunkRef, array: ChunkArray) -> bool:
28
+ return ref.window is not None
29
+
30
+ def read(self, ref: ChunkRef, resolved: ResolvedSelection) -> Any:
31
+ from rasterio.io import DatasetReader
32
+ dataset = ref.source.get_impl(DatasetReader)
33
+ # Pass window as tuple-of-ranges; rasterio normalises it internally.
34
+ return dataset.read(window=ref.window)
35
+
36
+
37
+ class ByteRangeReader(ReaderStrategy):
38
+ """Reads ref.byte_range via the source driver's partial GET."""
39
+
40
+ def can_read(self, ref: ChunkRef, array: ChunkArray) -> bool:
41
+ return ref.byte_range is not None
42
+
43
+ def read(self, ref: ChunkRef, resolved: ResolvedSelection) -> Any:
44
+ offset, length = ref.byte_range
45
+ stream = ref.source.get_impl(io.BytesIO)
46
+ stream.seek(offset)
47
+ return stream.read(length)
48
+
49
+
50
+ _STRATEGIES = (RasterWindowReader(), ByteRangeReader())
51
+ _BY_NAME = {"raster": RasterWindowReader, "range": ByteRangeReader}
52
+
53
+
54
+ def select_reader(ref: ChunkRef, array: ChunkArray,
55
+ hint: Optional[str] = None) -> ReaderStrategy:
56
+ if hint is not None:
57
+ try:
58
+ return _BY_NAME[hint]()
59
+ except KeyError as exc:
60
+ raise DrbChunkError(
61
+ f"Unknown reader hint '{hint}'. "
62
+ f"Available: {', '.join(sorted(_BY_NAME))}."
63
+ ) from exc
64
+ for strategy in _STRATEGIES:
65
+ if strategy.can_read(ref, array):
66
+ return strategy
67
+ raise DrbChunkError(
68
+ "No reader strategy applies to this chunk (no window, no byte_range). "
69
+ "Refusing to fall back to a full read."
70
+ )