kfbslide 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kfbslide/__init__.py ADDED
@@ -0,0 +1,60 @@
1
+ """
2
+ KFBSlide — Pure Python KFB whole-slide image reader.
3
+
4
+ A minimal, cross-platform reader for KFB (KFBio) whole-slide images
5
+ with an OpenSlide-compatible API.
6
+
7
+ Author: Yifan Feng <evanfeng97@gmail.com>
8
+ License: MIT
9
+
10
+ Drop-in replacement usage:
11
+ import kfbslide as openslide
12
+ slide = openslide.OpenSlide("sample.kfb")
13
+ """
14
+
15
+ from ._slide import OpenSlide, KfbSlide, open_slide
16
+ from ._exceptions import (
17
+ OpenSlideError,
18
+ OpenSlideUnsupportedFormatError,
19
+ KfbError,
20
+ KfbUnsupportedFormatError,
21
+ KfbOpenError,
22
+ )
23
+
24
+ __version__ = "0.2.0"
25
+
26
+ # Standard OpenSlide property name constants
27
+ PROPERTY_NAME_VENDOR = "openslide.vendor"
28
+ PROPERTY_NAME_QUICKHASH1 = "openslide.quickhash-1"
29
+ PROPERTY_NAME_BACKGROUND_COLOR = "openslide.background-color"
30
+ PROPERTY_NAME_OBJECTIVE_POWER = "openslide.objective-power"
31
+ PROPERTY_NAME_MPP_X = "openslide.mpp-x"
32
+ PROPERTY_NAME_MPP_Y = "openslide.mpp-y"
33
+ PROPERTY_NAME_BOUNDS_X = "openslide.bounds-x"
34
+ PROPERTY_NAME_BOUNDS_Y = "openslide.bounds-y"
35
+ PROPERTY_NAME_BOUNDS_WIDTH = "openslide.bounds-width"
36
+ PROPERTY_NAME_BOUNDS_HEIGHT = "openslide.bounds-height"
37
+
38
+ __all__ = [
39
+ # Primary OpenSlide API
40
+ "OpenSlide",
41
+ "OpenSlideError",
42
+ "OpenSlideUnsupportedFormatError",
43
+ # Property constants
44
+ "PROPERTY_NAME_VENDOR",
45
+ "PROPERTY_NAME_QUICKHASH1",
46
+ "PROPERTY_NAME_BACKGROUND_COLOR",
47
+ "PROPERTY_NAME_OBJECTIVE_POWER",
48
+ "PROPERTY_NAME_MPP_X",
49
+ "PROPERTY_NAME_MPP_Y",
50
+ "PROPERTY_NAME_BOUNDS_X",
51
+ "PROPERTY_NAME_BOUNDS_Y",
52
+ "PROPERTY_NAME_BOUNDS_WIDTH",
53
+ "PROPERTY_NAME_BOUNDS_HEIGHT",
54
+ # Backward compatibility
55
+ "KfbSlide",
56
+ "open_slide",
57
+ "KfbError",
58
+ "KfbUnsupportedFormatError",
59
+ "KfbOpenError",
60
+ ]
kfbslide/_cache.py ADDED
@@ -0,0 +1,40 @@
1
+ """LRU cache for decoded KFB tiles.
2
+
3
+ Author: Yifan Feng <evanfeng97@gmail.com>
4
+ """
5
+
6
+ from collections import OrderedDict
7
+ from typing import Optional
8
+
9
+ from PIL import Image
10
+
11
+
12
+ class _LRUCache:
13
+ """OrderedDict-backed LRU cache for decoded tiles (O(1) ops)."""
14
+
15
+ __slots__ = ("capacity", "_cache")
16
+
17
+ def __init__(self, capacity: int):
18
+ self.capacity = max(0, capacity)
19
+ self._cache: OrderedDict[int, Image.Image] = OrderedDict()
20
+
21
+ def get(self, key: int) -> Optional[Image.Image]:
22
+ if key not in self._cache:
23
+ return None
24
+ self._cache.move_to_end(key)
25
+ return self._cache[key]
26
+
27
+ def put(self, key: int, value: Image.Image) -> None:
28
+ if self.capacity <= 0:
29
+ return
30
+ if key in self._cache:
31
+ self._cache.move_to_end(key)
32
+ elif len(self._cache) >= self.capacity:
33
+ self._cache.popitem(last=False)
34
+ self._cache[key] = value
35
+
36
+ def clear(self) -> None:
37
+ self._cache.clear()
38
+
39
+ def __len__(self) -> int:
40
+ return len(self._cache)
@@ -0,0 +1,22 @@
1
+ """KFBSlide / OpenSlide-compatible exceptions.
2
+
3
+ Author: Yifan Feng <evanfeng97@gmail.com>
4
+ """
5
+
6
+
7
+ class OpenSlideError(Exception):
8
+ """Base exception for OpenSlide errors."""
9
+
10
+ pass
11
+
12
+
13
+ class OpenSlideUnsupportedFormatError(OpenSlideError):
14
+ """File format not supported or file is corrupted."""
15
+
16
+ pass
17
+
18
+
19
+ # Backward compatibility aliases
20
+ KfbError = OpenSlideError
21
+ KfbUnsupportedFormatError = OpenSlideUnsupportedFormatError
22
+ KfbOpenError = OpenSlideError
kfbslide/_kfbformat.py ADDED
@@ -0,0 +1,244 @@
1
+ """
2
+ KFB 文件格式解析器 —— 纯 Python 实现
3
+
4
+ 基于对 KFB 文件二进制结构的分析:
5
+ - Section 0x01: 文件基本信息(版本、尺寸、瓦片数等)
6
+ - Section 0x02: 关联图像(macro/label/thumbnail)信息
7
+ - 关联图像数据: JPEG 格式,紧跟在 section 0x02 之后
8
+ - 瓦片数据: JPEG 格式,通过瓦片索引表定位
9
+
10
+ Author: Yifan Feng <evanfeng97@gmail.com>
11
+ """
12
+
13
+ import struct
14
+ from dataclasses import dataclass
15
+ from typing import List, Optional, Dict
16
+
17
+
18
+ @dataclass
19
+ class KfbSection:
20
+ """KFB Section: f? XX ee ee ... ff XX ee ee"""
21
+
22
+ sec_type: int
23
+ marker: int
24
+ offset: int # 在文件中的绝对偏移
25
+ footer_pos: int # footer 在文件中的绝对偏移
26
+ payload: bytes
27
+
28
+
29
+ @dataclass
30
+ class KfbHeader:
31
+ """KFB 文件头信息 (Section 0x01)"""
32
+
33
+ magic: str # "KFB"
34
+ version: float # 版本号 (如 1.6)
35
+ tile_count: int # 瓦片总数
36
+ height: int # 基础图像高度
37
+ width: int # 基础图像宽度
38
+ scan_scale: int # 扫描倍率 (如 40)
39
+ format: str # 压缩格式 (如 "JPEG")
40
+ spend_time: int # 扫描耗时
41
+ scan_time: int # 扫描时间戳
42
+ tile_size: int # 瓦片尺寸 (如 256)
43
+ # 以下为解析出的字段
44
+ section_0x02_offset: int # section 0x02 (macro) 在文件中的偏移
45
+ section_0x03_offset: int # section 0x03 (label) 在文件中的偏移
46
+ # 瓦片索引表范围
47
+ tile_index_start: int # 索引表起始偏移
48
+ tile_index_end: int # 索引表结束偏移
49
+ # 原始未知字段
50
+ _raw_field_0x3c: int
51
+ mpp: float # microns per pixel
52
+
53
+
54
+ @dataclass
55
+ class KfbAssocImage:
56
+ """关联图像信息"""
57
+
58
+ name: str
59
+ width: int
60
+ height: int
61
+ data_length: int
62
+ data_offset: int # 在文件中的绝对偏移
63
+
64
+
65
+ @dataclass
66
+ class KfbFileInfo:
67
+ """完整的 KFB 文件信息"""
68
+
69
+ header: KfbHeader
70
+ assoc_images: List[KfbAssocImage]
71
+ tile_index_offset: int
72
+ tile_data_offset: int
73
+
74
+
75
+ def _read_section(data: bytes, offset: int) -> Optional[KfbSection]:
76
+ """读取一个 section: f? XX ee ee ... ff XX ee ee"""
77
+ if offset + 4 > len(data):
78
+ return None
79
+
80
+ header = data[offset : offset + 4]
81
+ if header[2:4] != b"\xee\xee":
82
+ return None
83
+
84
+ sec_type = header[1]
85
+ sec_marker = header[0]
86
+
87
+ # 寻找 footer: ff XX ee ee
88
+ footer_pos = offset + 4
89
+ max_search = min(offset + 100000, len(data) - 4)
90
+ while footer_pos < max_search:
91
+ if data[footer_pos : footer_pos + 4] == bytes([0xFF, sec_type, 0xEE, 0xEE]):
92
+ break
93
+ footer_pos += 1
94
+ else:
95
+ return None
96
+
97
+ payload = data[offset + 4 : footer_pos]
98
+ return KfbSection(
99
+ sec_type=sec_type,
100
+ marker=sec_marker,
101
+ offset=offset,
102
+ footer_pos=footer_pos,
103
+ payload=payload,
104
+ )
105
+
106
+
107
+ def _parse_header(section: KfbSection) -> KfbHeader:
108
+ """解析 Section 0x01 (文件头)"""
109
+ p = section.payload
110
+ if len(p) < 88:
111
+ raise ValueError(f"Header section too small: {len(p)} bytes")
112
+
113
+ return KfbHeader(
114
+ magic=p[0:4].decode("ascii", errors="replace").rstrip("\x00"),
115
+ version=struct.unpack("<f", p[8:12])[0],
116
+ tile_count=struct.unpack("<I", p[12:16])[0],
117
+ height=struct.unpack("<I", p[16:20])[0],
118
+ width=struct.unpack("<I", p[20:24])[0],
119
+ scan_scale=struct.unpack("<I", p[24:28])[0],
120
+ format=p[28:32].decode("ascii", errors="replace").rstrip("\x00"),
121
+ spend_time=struct.unpack("<I", p[36:40])[0],
122
+ scan_time=struct.unpack("<q", p[40:48])[0],
123
+ tile_size=struct.unpack("<I", p[84:88])[0],
124
+ section_0x02_offset=struct.unpack("<I", p[48:52])[0],
125
+ section_0x03_offset=struct.unpack("<I", p[52:56])[0],
126
+ tile_index_end=struct.unpack("<I", p[56:60])[0],
127
+ _raw_field_0x3c=struct.unpack("<I", p[60:64])[0],
128
+ tile_index_start=struct.unpack("<I", p[64:68])[0],
129
+ mpp=struct.unpack("<f", p[72:76])[0],
130
+ )
131
+
132
+
133
+ def _parse_image_section(data: bytes, section_offset: int, name: str) -> KfbAssocImage:
134
+ """解析一个图像描述 section (0x02 或 0x03)"""
135
+ p = data[section_offset + 4 : section_offset + 4 + 44]
136
+ height = struct.unpack("<I", p[4:8])[0]
137
+ width = struct.unpack("<I", p[8:12])[0]
138
+ data_length = struct.unpack("<I", p[16:20])[0]
139
+ rel_offset = struct.unpack("<I", p[20:24])[0]
140
+ data_offset = section_offset + rel_offset
141
+ return KfbAssocImage(
142
+ name=name,
143
+ width=width,
144
+ height=height,
145
+ data_length=data_length,
146
+ data_offset=data_offset,
147
+ )
148
+
149
+
150
+ def _parse_assoc_images(sec2: KfbSection, header: KfbHeader, data: bytes) -> List[KfbAssocImage]:
151
+ """
152
+ 解析所有关联图像信息。
153
+
154
+ 文件结构:
155
+ - section 0x02: macro 图像信息
156
+ - section 0x03: label 图像信息
157
+ - thumbnail: 动态生成,不在文件中预存
158
+ """
159
+ images = []
160
+
161
+ # macro (section 0x02)
162
+ images.append(_parse_image_section(data, sec2.offset, "macro"))
163
+
164
+ # label (section 0x03)
165
+ label_sec_offset = header.section_0x03_offset
166
+ if label_sec_offset > 0 and data[label_sec_offset : label_sec_offset + 2] == b"\xf1\x03":
167
+ images.append(_parse_image_section(data, label_sec_offset, "label"))
168
+
169
+ return images
170
+
171
+
172
+ def parse_kfb_file(path: str) -> KfbFileInfo:
173
+ """解析 KFB 文件,返回完整信息。
174
+
175
+ 按需读取数据:先读 1MB,如果 section 0x02 或 JPEG 标记超出范围,
176
+ 则动态扩展读取更多数据。
177
+ """
178
+ with open(path, "rb") as f:
179
+ # 先读取前 1MB(通常足够包含文件头)
180
+ data = bytearray(f.read(1024 * 1024))
181
+
182
+ # 读取 section 0x01
183
+ sec1 = _read_section(data, 0)
184
+ if not sec1 or sec1.sec_type != 0x01:
185
+ raise ValueError("Invalid KFB file: missing section 0x01")
186
+
187
+ header = _parse_header(sec1)
188
+
189
+ # 搜索 section 0x02 (它不一定紧跟在 section 0x01 之后)
190
+ sec2_pos = sec1.footer_pos + 4
191
+ needed = sec2_pos + 10000 + 4 # 搜索范围 + footer 安全区
192
+ while len(data) < needed:
193
+ chunk = f.read(1024 * 1024)
194
+ if not chunk:
195
+ break
196
+ data.extend(chunk)
197
+
198
+ sec2 = None
199
+ max_search = min(sec2_pos + 10000, len(data) - 4)
200
+ while sec2_pos < max_search:
201
+ sec2 = _read_section(data, sec2_pos)
202
+ if sec2 and sec2.sec_type == 0x02:
203
+ break
204
+ sec2_pos += 1
205
+ else:
206
+ raise ValueError("Invalid KFB file: missing section 0x02")
207
+
208
+ # 关联图像
209
+ assoc_images = _parse_assoc_images(sec2, header, data)
210
+
211
+ # 计算瓦片区域
212
+ last_assoc = assoc_images[-1]
213
+ tile_index_offset = last_assoc.data_offset + last_assoc.data_length
214
+
215
+ # 搜索第一个 JPEG 瓦片:确保数据足够覆盖搜索范围
216
+ needed_jpeg = tile_index_offset + 1024 * 1024 # 至少再读 1MB
217
+ while len(data) < needed_jpeg:
218
+ chunk = f.read(1024 * 1024)
219
+ if not chunk:
220
+ break
221
+ data.extend(chunk)
222
+
223
+ first_jpeg = data.find(b"\xff\xd8\xff", tile_index_offset)
224
+ if first_jpeg == -1:
225
+ # 继续读取更多数据直到文件末尾
226
+ while True:
227
+ chunk = f.read(1024 * 1024)
228
+ if not chunk:
229
+ break
230
+ offset = len(data)
231
+ data.extend(chunk)
232
+ local_jpeg = chunk.find(b"\xff\xd8\xff")
233
+ if local_jpeg != -1:
234
+ first_jpeg = offset + local_jpeg
235
+ break
236
+
237
+ tile_data_offset = first_jpeg if first_jpeg != -1 else tile_index_offset
238
+
239
+ return KfbFileInfo(
240
+ header=header,
241
+ assoc_images=assoc_images,
242
+ tile_index_offset=tile_index_offset,
243
+ tile_data_offset=tile_data_offset,
244
+ )
kfbslide/_slide.py ADDED
@@ -0,0 +1,525 @@
1
+ """
2
+ KFBSlide — OpenSlide-compatible API for KFB files.
3
+
4
+ Architecture:
5
+ - File header parsing: Pure Python (cross-platform)
6
+ - Associated images: Pure Python (read JPEG directly from file)
7
+ - Tile reading: Pure Python via tile index table
8
+ - Tile cache: LRU decoded-tile cache for repeated reads
9
+ - JPEG decoding: Pillow (pure Python, no extra dependencies)
10
+
11
+ Author: Yifan Feng <evanfeng97@gmail.com>
12
+ """
13
+
14
+ import io
15
+ import struct
16
+ from collections.abc import Mapping
17
+ from typing import Dict, List, Optional, Tuple
18
+
19
+ from PIL import Image
20
+
21
+ from ._cache import _LRUCache
22
+ from ._exceptions import OpenSlideError, OpenSlideUnsupportedFormatError
23
+ from ._kfbformat import KfbAssocImage, KfbFileInfo, parse_kfb_file
24
+
25
+
26
+ class _KfbPropertyMap(Mapping):
27
+ """Read-only mapping compatible with OpenSlide's property map."""
28
+
29
+ __slots__ = ("_data",)
30
+
31
+ def __init__(self, items: Dict[str, str]):
32
+ self._data = dict(items)
33
+
34
+ def __getitem__(self, key: str) -> str:
35
+ return self._data[key]
36
+
37
+ def __iter__(self):
38
+ return iter(self._data)
39
+
40
+ def __len__(self) -> int:
41
+ return len(self._data)
42
+
43
+ def __repr__(self) -> str:
44
+ return f"_KfbPropertyMap({self._data!r})"
45
+
46
+
47
+ class _AssociatedImageMap(Mapping):
48
+ """Lazy read-only mapping for associated images.
49
+
50
+ When a file_handle_getter is provided and returns a valid handle,
51
+ it reuses that handle to avoid reopening the file on every access.
52
+ If the handle is unavailable (e.g. slide closed), falls back to
53
+ opening the file independently.
54
+ """
55
+
56
+ __slots__ = ("_filename", "_assoc_list", "_names", "_cache", "_fh_getter")
57
+
58
+ def __init__(
59
+ self,
60
+ filename: str,
61
+ assoc_images: List[KfbAssocImage],
62
+ file_handle_getter=None,
63
+ ):
64
+ self._filename = filename
65
+ self._assoc_list = assoc_images
66
+ self._names = [a.name for a in assoc_images]
67
+ self._cache: Dict[str, Image.Image] = {}
68
+ self._fh_getter = file_handle_getter
69
+
70
+ def __getitem__(self, key: str) -> Image.Image:
71
+ if key in self._cache:
72
+ return self._cache[key]
73
+ for assoc in self._assoc_list:
74
+ if assoc.name == key:
75
+ # Try to reuse the slide's file handle first.
76
+ fh = self._fh_getter() if self._fh_getter else None
77
+ if fh is not None:
78
+ fh.seek(assoc.data_offset)
79
+ jpeg_data = fh.read(assoc.data_length)
80
+ else:
81
+ with open(self._filename, "rb") as f:
82
+ f.seek(assoc.data_offset)
83
+ jpeg_data = f.read(assoc.data_length)
84
+ img = Image.open(io.BytesIO(jpeg_data)).convert("RGBA")
85
+ self._cache[key] = img
86
+ return img
87
+ raise KeyError(key)
88
+
89
+ def __iter__(self):
90
+ return iter(self._names)
91
+
92
+ def __len__(self) -> int:
93
+ return len(self._names)
94
+
95
+ def __repr__(self) -> str:
96
+ return f"_AssociatedImageMap({self._names!r})"
97
+
98
+
99
+ class _TileIndex:
100
+ """Parsed KFB tile index table."""
101
+
102
+ __slots__ = (
103
+ "filename",
104
+ "tile_size",
105
+ "entries",
106
+ "lookup",
107
+ "offsets",
108
+ "level_scales",
109
+ "scale_to_level",
110
+ "level_count",
111
+ "_level_dimensions",
112
+ "_level_downsamples",
113
+ )
114
+
115
+ def __init__(self, filename: str, info: KfbFileInfo):
116
+ self.filename = filename
117
+ self.tile_size = info.header.tile_size
118
+
119
+ tile_count = info.header.tile_count
120
+ idx_start = info.header.tile_index_start
121
+
122
+ with open(filename, "rb") as f:
123
+ f.seek(idx_start)
124
+ data = f.read(tile_count * 64)
125
+
126
+ self.entries: List[Dict] = []
127
+ self.lookup: Dict[Tuple[float, int, int], int] = {}
128
+ scales = set()
129
+
130
+ for i in range(tile_count):
131
+ off = i * 64
132
+ e = data[off : off + 64]
133
+ entry = {
134
+ "scale": struct.unpack("<f", e[20:24])[0],
135
+ "x": struct.unpack("<I", e[4:8])[0],
136
+ "y": struct.unpack("<I", e[8:12])[0],
137
+ "width": struct.unpack("<I", e[12:16])[0],
138
+ "height": struct.unpack("<I", e[16:20])[0],
139
+ "size": struct.unpack("<I", e[32:36])[0],
140
+ }
141
+ self.entries.append(entry)
142
+ self.lookup[(entry["scale"], entry["x"], entry["y"])] = i
143
+ scales.add(entry["scale"])
144
+
145
+ # Compute file offset for each tile.
146
+ cumulative = info.tile_data_offset
147
+ self.offsets: List[int] = []
148
+ for entry in self.entries:
149
+ self.offsets.append(cumulative)
150
+ cumulative += entry["size"]
151
+
152
+ # Build level info from scales (descending: 40.0, 20.0, ...).
153
+ sorted_scales = sorted([s for s in scales if s >= 1.0], reverse=True)
154
+ self.level_scales = {i: s for i, s in enumerate(sorted_scales)}
155
+ self.scale_to_level = {s: i for i, s in self.level_scales.items()}
156
+ self.level_count = len(sorted_scales)
157
+
158
+ # Level dimensions derived from base resolution.
159
+ base_w, base_h = info.header.width, info.header.height
160
+ self._level_dimensions: List[Tuple[int, int]] = []
161
+ self._level_downsamples: List[float] = []
162
+ for i in range(self.level_count):
163
+ scale = self.level_scales[i]
164
+ ds = info.header.scan_scale / scale
165
+ self._level_downsamples.append(ds)
166
+ self._level_dimensions.append((int(base_w / ds), int(base_h / ds)))
167
+
168
+ def level_dimensions(self) -> Tuple[Tuple[int, int], ...]:
169
+ return tuple(self._level_dimensions)
170
+
171
+ def level_downsamples(self) -> Tuple[float, ...]:
172
+ return tuple(self._level_downsamples)
173
+
174
+ def get_best_level_for_downsample(self, downsample: float) -> int:
175
+ """Choose the level with downsample closest to but not greater than target."""
176
+ best = 0
177
+ for i in range(self.level_count):
178
+ if self._level_downsamples[i] <= downsample:
179
+ best = i
180
+ else:
181
+ break
182
+ return best
183
+
184
+
185
+ class OpenSlide:
186
+ """
187
+ KFB whole-slide image reader with OpenSlide-compatible API.
188
+
189
+ Interface compatible with openslide-python, but completely independent
190
+ and implemented in pure Python with no native library dependencies.
191
+ """
192
+
193
+ __slots__ = (
194
+ "_filename",
195
+ "_closed",
196
+ "_error",
197
+ "_tile_cache",
198
+ "_file_handle",
199
+ "_info",
200
+ "_index",
201
+ "_properties",
202
+ "_associated_images",
203
+ )
204
+
205
+ def __init__(self, filename: str):
206
+ self._filename = filename
207
+ self._closed = False
208
+ self._error = False
209
+ self._tile_cache = _LRUCache(256)
210
+ self._file_handle: Optional[io.BufferedReader] = None
211
+
212
+ try:
213
+ self._info = parse_kfb_file(filename)
214
+ except Exception as e:
215
+ raise OpenSlideUnsupportedFormatError(f"Cannot parse KFB file: {e}")
216
+
217
+ try:
218
+ self._index = _TileIndex(filename, self._info)
219
+ except Exception as e:
220
+ raise OpenSlideError(f"Failed to build tile index: {e}")
221
+
222
+ try:
223
+ self._file_handle = open(filename, "rb")
224
+ except Exception as e:
225
+ raise OpenSlideError(f"Failed to open file handle: {e}")
226
+
227
+ # Pre-build property map.
228
+ header = self._info.header
229
+ props = {
230
+ "openslide.vendor": "kfbio",
231
+ "openslide.quickhash-1": "",
232
+ "openslide.mpp-x": str(header.mpp),
233
+ "openslide.mpp-y": str(header.mpp),
234
+ "openslide.objective-power": str(header.scan_scale),
235
+ "kfbio.vendor": "Kfbio",
236
+ "kfbio.version": str(header.version),
237
+ "kfbio.scan_scale": str(header.scan_scale),
238
+ "kfbio.tile_size": str(header.tile_size),
239
+ "kfbio.tile_count": str(header.tile_count),
240
+ "kfbio.width": str(header.width),
241
+ "kfbio.height": str(header.height),
242
+ "kfbio.spend_time": str(header.spend_time),
243
+ "kfbio.scan_time": str(header.scan_time),
244
+ "kfbio.mpp": str(header.mpp),
245
+ }
246
+ self._properties = _KfbPropertyMap(props)
247
+
248
+ # Lazy associated images map (reuses file handle when possible).
249
+ self._associated_images = _AssociatedImageMap(
250
+ filename,
251
+ self._info.assoc_images,
252
+ file_handle_getter=lambda: self._file_handle
253
+ if not self._closed
254
+ else None,
255
+ )
256
+
257
+ # ------------------------------------------------------------------
258
+ # Class methods
259
+ # ------------------------------------------------------------------
260
+
261
+ @classmethod
262
+ def detect_format(cls, filename: str) -> Optional[str]:
263
+ """Detect whether a file is a KFB format slide.
264
+
265
+ Returns:
266
+ "kfbio" if the file is recognized, None otherwise.
267
+ """
268
+ try:
269
+ info = parse_kfb_file(filename)
270
+ if info.header.magic == "KFB":
271
+ return "kfbio"
272
+ except Exception:
273
+ pass
274
+ return None
275
+
276
+ # ------------------------------------------------------------------
277
+ # Internal helpers
278
+ # ------------------------------------------------------------------
279
+
280
+ def _check_open(self) -> None:
281
+ """Raise if the slide is closed."""
282
+ if self._closed:
283
+ raise OpenSlideError("Slide is closed")
284
+
285
+ def _check_error(self) -> None:
286
+ """Raise if an error has occurred (latching semantics)."""
287
+ if self._error:
288
+ raise OpenSlideError("OpenSlide error has occurred")
289
+
290
+ def __repr__(self) -> str:
291
+ return f"{self.__class__.__name__}({self._filename!r})"
292
+
293
+ def __enter__(self):
294
+ return self
295
+
296
+ def __exit__(self, exc_type, exc_val, exc_tb):
297
+ self.close()
298
+ return False
299
+
300
+ def close(self) -> None:
301
+ """Close the slide and release resources."""
302
+ self._closed = True
303
+ self._tile_cache.clear()
304
+ if self._file_handle is not None:
305
+ try:
306
+ self._file_handle.close()
307
+ except Exception:
308
+ pass
309
+ self._file_handle = None
310
+
311
+ # ------------------------------------------------------------------
312
+ # Properties
313
+ # ------------------------------------------------------------------
314
+
315
+ @property
316
+ def level_count(self) -> int:
317
+ """Number of pyramid levels."""
318
+ self._check_open()
319
+ self._check_error()
320
+ return self._index.level_count
321
+
322
+ @property
323
+ def dimensions(self) -> Tuple[int, int]:
324
+ """Dimensions of the slide at level 0 (highest resolution)."""
325
+ self._check_open()
326
+ self._check_error()
327
+ return self._index.level_dimensions()[0]
328
+
329
+ @property
330
+ def level_dimensions(self) -> Tuple[Tuple[int, int], ...]:
331
+ """Dimensions of each pyramid level."""
332
+ self._check_open()
333
+ self._check_error()
334
+ return self._index.level_dimensions()
335
+
336
+ @property
337
+ def level_downsamples(self) -> Tuple[float, ...]:
338
+ """Downsample factor for each level."""
339
+ self._check_open()
340
+ self._check_error()
341
+ return self._index.level_downsamples()
342
+
343
+ @property
344
+ def properties(self) -> Mapping[str, str]:
345
+ """Metadata properties as a read-only mapping."""
346
+ self._check_open()
347
+ self._check_error()
348
+ return self._properties
349
+
350
+ @property
351
+ def associated_images(self) -> Mapping[str, Image.Image]:
352
+ """Associated images (macro, label, thumbnail) as a lazy mapping."""
353
+ self._check_open()
354
+ self._check_error()
355
+ return self._associated_images
356
+
357
+ @property
358
+ def color_profile(self) -> Optional[object]:
359
+ """Embedded ICC color profile, or None if not available."""
360
+ self._check_open()
361
+ self._check_error()
362
+ return None
363
+
364
+ # ------------------------------------------------------------------
365
+ # Reading operations
366
+ # ------------------------------------------------------------------
367
+
368
+ def get_best_level_for_downsample(self, downsample: float) -> int:
369
+ """Get the best pyramid level for a given downsample factor."""
370
+ self._check_open()
371
+ self._check_error()
372
+ return self._index.get_best_level_for_downsample(downsample)
373
+
374
+ def read_region(
375
+ self,
376
+ location: Tuple[int, int],
377
+ level: int,
378
+ size: Tuple[int, int],
379
+ ) -> Image.Image:
380
+ """
381
+ Read a region from the slide.
382
+
383
+ Args:
384
+ location: (x, y) top-left coordinates in level 0.
385
+ level: Pyramid level.
386
+ size: (width, height) output size.
387
+
388
+ Returns:
389
+ PIL.Image.Image (RGBA).
390
+ """
391
+ self._check_open()
392
+ self._check_error()
393
+
394
+ try:
395
+ if level < 0 or level >= self._index.level_count:
396
+ raise OpenSlideError(f"Invalid level {level}")
397
+
398
+ scale = self._index.level_scales[level]
399
+ ds = self._info.header.scan_scale / scale
400
+
401
+ x0, y0 = int(location[0] / ds), int(location[1] / ds)
402
+ w, h = int(size[0]), int(size[1])
403
+
404
+ out = Image.new("RGBA", (w, h), (0, 0, 0, 0))
405
+ tile_size = self._index.tile_size
406
+
407
+ tx_start = x0 // tile_size
408
+ ty_start = y0 // tile_size
409
+ tx_end = (x0 + w - 1) // tile_size + 1
410
+ ty_end = (y0 + h - 1) // tile_size + 1
411
+
412
+ if self._file_handle is None:
413
+ raise OpenSlideError("Slide is closed")
414
+ f = self._file_handle
415
+
416
+ for ty in range(ty_start, ty_end):
417
+ for tx in range(tx_start, tx_end):
418
+ tile_x = tx * tile_size
419
+ tile_y = ty * tile_size
420
+ key = (scale, tile_x, tile_y)
421
+ idx = self._index.lookup.get(key)
422
+ if idx is None:
423
+ continue
424
+
425
+ entry = self._index.entries[idx]
426
+ tw, th = entry["width"], entry["height"]
427
+
428
+ # Try cached decoded tile first.
429
+ tile = self._tile_cache.get(idx)
430
+ if tile is None:
431
+ offset = self._index.offsets[idx]
432
+ f.seek(offset)
433
+ jpeg = f.read(entry["size"])
434
+ tile = Image.open(io.BytesIO(jpeg)).convert("RGB")
435
+
436
+ # Tile may be smaller than tile_size at image edges.
437
+ if tile.size != (tw, th):
438
+ tile = tile.resize((tw, th))
439
+
440
+ self._tile_cache.put(idx, tile)
441
+
442
+ paste_x = tile_x - x0
443
+ paste_y = tile_y - y0
444
+ crop_x0 = max(0, x0 - tile_x)
445
+ crop_y0 = max(0, y0 - tile_y)
446
+ crop_x1 = min(tw, x0 + w - tile_x)
447
+ crop_y1 = min(th, y0 + h - tile_y)
448
+
449
+ if crop_x1 > crop_x0 and crop_y1 > crop_y0:
450
+ cropped = tile.crop((crop_x0, crop_y0, crop_x1, crop_y1))
451
+ cropped_rgba = cropped.convert("RGBA")
452
+ out.paste(
453
+ cropped_rgba,
454
+ (paste_x + crop_x0, paste_y + crop_y0),
455
+ )
456
+
457
+ return out
458
+ except Exception:
459
+ self._error = True
460
+ raise
461
+
462
+ def get_thumbnail(self, size: Tuple[int, int]) -> Image.Image:
463
+ """Get a thumbnail image."""
464
+ self._check_open()
465
+ self._check_error()
466
+
467
+ thumb = None
468
+ try:
469
+ thumb = self._associated_images.get("thumbnail")
470
+ except KeyError:
471
+ pass
472
+
473
+ if thumb is not None:
474
+ thumb_copy = thumb.copy()
475
+ thumb_copy.thumbnail(size, Image.LANCZOS)
476
+ return thumb_copy
477
+
478
+ # Fallback: read from lowest resolution level.
479
+ level = self.level_count - 1
480
+ dims = self.level_dimensions[level]
481
+ return self.read_region((0, 0), level, dims)
482
+
483
+ def set_cache(self, cache) -> None:
484
+ """
485
+ Attach a shared cache to the slide.
486
+
487
+ For kfbslide, this is currently a no-op since we use a private
488
+ per-slide LRU cache. Accepts the cache argument for API compatibility.
489
+ """
490
+ self._check_open()
491
+ self._check_error()
492
+ # No-op for now. Could be extended to use a shared cache.
493
+ pass
494
+
495
+
496
+ # Backward compatibility alias
497
+ KfbSlide = OpenSlide
498
+
499
+
500
+ def open_slide(filename: str, **kwargs) -> OpenSlide:
501
+ """
502
+ Open a KFB file.
503
+
504
+ Args:
505
+ filename: Path to the KFB file.
506
+
507
+ Returns:
508
+ OpenSlide instance.
509
+ """
510
+ if kwargs.pop("tile_cache_size", None) is not None:
511
+ import warnings
512
+
513
+ warnings.warn(
514
+ "tile_cache_size is deprecated and ignored",
515
+ DeprecationWarning,
516
+ stacklevel=2,
517
+ )
518
+ return OpenSlide(filename)
519
+
520
+
521
+ __all__ = [
522
+ "OpenSlide",
523
+ "KfbSlide",
524
+ "open_slide",
525
+ ]
@@ -0,0 +1,246 @@
1
+ Metadata-Version: 2.4
2
+ Name: kfbslide
3
+ Version: 0.2.0
4
+ Summary: Pure Python KFB whole-slide image reader with OpenSlide-compatible API
5
+ Project-URL: Homepage, https://github.com/yifanfeng97/kfbslide
6
+ Project-URL: Documentation, https://github.com/yifanfeng97/kfbslide#readme
7
+ Project-URL: Repository, https://github.com/yifanfeng97/kfbslide
8
+ Project-URL: Issues, https://github.com/yifanfeng97/kfbslide/issues
9
+ Author-email: Yifan Feng <evanfeng97@gmail.com>
10
+ License-Expression: MIT
11
+ License-File: LICENSE
12
+ Keywords: digital-pathology,kfb,kfbio,openslide,pathology,whole-slide-image,wsi
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Scientific/Engineering :: Image Processing
22
+ Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: pillow>=9.0.0
25
+ Provides-Extra: dev
26
+ Requires-Dist: mypy; extra == 'dev'
27
+ Requires-Dist: pytest-cov; extra == 'dev'
28
+ Requires-Dist: pytest>=7.0; extra == 'dev'
29
+ Requires-Dist: ruff; extra == 'dev'
30
+ Description-Content-Type: text/markdown
31
+
32
+ <h1 align="center">KFBSlide</h1>
33
+
34
+ <p align="center">
35
+ <strong>A pure-Python KFB (KFBio) whole-slide image reader with an OpenSlide-compatible API</strong>
36
+ </p>
37
+
38
+ <p align="center">
39
+ <a href="README.md">English</a> |
40
+ <a href="README_zh.md">简体中文</a>
41
+ </p>
42
+
43
+ <p align="center">
44
+ <a href="https://pypi.org/project/kfbslide"><img src="https://img.shields.io/pypi/v/kfbslide?color=blue" alt="PyPI"></a>
45
+ <a href="https://pypi.org/project/kfbslide"><img src="https://img.shields.io/pypi/pyversions/kfbslide" alt="Python Versions"></a>
46
+ <a href="https://github.com/yifanfeng97/kfbslide/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="License"></a>
47
+ <a href="https://pypi.org/project/kfbslide"><img src="https://img.shields.io/pypi/dm/kfbslide?color=orange" alt="Downloads"></a>
48
+ <a href="https://github.com/yifanfeng97/kfbslide"><img src="https://img.shields.io/github/stars/yifanfeng97/kfbslide?style=social" alt="Stars"></a>
49
+ </p>
50
+
51
+ <p align="center">
52
+ <a href="#-features">✨ Features</a> •
53
+ <a href="#-installation">📦 Installation</a> •
54
+ <a href="#-quick-start">🚀 Quick Start</a> •
55
+ <a href="#-api-reference">📖 API</a> •
56
+ <a href="#-performance">⚡ Performance</a>
57
+ </p>
58
+
59
+ <p align="center">
60
+ <img src="docs/banner.png" alt="KFBSlide Banner" width="900">
61
+ </p>
62
+
63
+ ---
64
+
65
+ ## ✨ Features
66
+
67
+ - 🐍 **Pure Python** — Zero native dependencies, works out of the box on Windows / macOS / Linux
68
+ - 🔄 **OpenSlide-Compatible API** — Drop-in replacement for `openslide-python`, no code changes needed
69
+ - 🔺 **Multi-Level Pyramids** — Automatically parses 40× / 20× / 10× / 5× / 2.5× / 1.25× levels inside KFB
70
+ - 🖼️ **Associated Images** — Supports macro, label, and thumbnail
71
+ - ⚡ **Tile LRU Cache** — 10~20× speedup for repeated reads of the same region
72
+ - 📊 **Full Metadata** — MPP, objective power, tile size, and more
73
+
74
+ ---
75
+
76
+ ## 📦 Installation
77
+
78
+ ### Using uv (recommended)
79
+
80
+ ```bash
81
+ uv pip install kfbslide
82
+ ```
83
+
84
+ ### Using pip
85
+
86
+ ```bash
87
+ pip install kfbslide
88
+ ```
89
+
90
+ Only depends on Pillow — installs directly on any platform.
91
+
92
+ ---
93
+
94
+ ## 🚀 Quick Start
95
+
96
+ ### Drop-in replacement for OpenSlide
97
+
98
+ ```python
99
+ import kfbslide as openslide
100
+
101
+ slide = openslide.OpenSlide("path/to/sample.kfb")
102
+
103
+ print(f"Levels: {slide.level_count}")
104
+ print(f"Level 0 dimensions: {slide.dimensions}")
105
+ for i in range(slide.level_count):
106
+ print(f" Level {i}: {slide.level_dimensions[i]} "
107
+ f"downsample={slide.level_downsamples[i]}")
108
+
109
+ # Read a region (location in level-0 coordinates, returns RGBA)
110
+ img = slide.read_region((1000, 2000), 0, (256, 256))
111
+ img.save("region.png")
112
+
113
+ # Thumbnail
114
+ thumb = slide.get_thumbnail((512, 512))
115
+ thumb.save("thumbnail.png")
116
+
117
+ # Associated images
118
+ macro = slide.associated_images["macro"]
119
+ macro.save("macro.png")
120
+
121
+ # Property access
122
+ vendor = slide.properties[openslide.PROPERTY_NAME_VENDOR]
123
+ mpp_x = slide.properties[openslide.PROPERTY_NAME_MPP_X]
124
+
125
+ slide.close()
126
+ ```
127
+
128
+ ### Context manager
129
+
130
+ ```python
131
+ with openslide.OpenSlide("sample.kfb") as slide:
132
+ img = slide.read_region((0, 0), 0, (256, 256))
133
+ # Automatically closed
134
+ ```
135
+
136
+ ---
137
+
138
+ ## 📖 API Reference
139
+
140
+ ### `OpenSlide(filename)`
141
+
142
+ Open a KFB file.
143
+
144
+ ### Class methods
145
+
146
+ | Method | Description |
147
+ |--------|-------------|
148
+ | `OpenSlide.detect_format(filename)` | Detect file format, returns `"kfbio"` or `None` |
149
+
150
+ ### Properties
151
+
152
+ | Property | Type | Description |
153
+ |----------|------|-------------|
154
+ | `level_count` | `int` | Number of pyramid levels |
155
+ | `dimensions` | `(int, int)` | Level 0 dimensions (highest resolution) |
156
+ | `level_dimensions` | `Tuple[(w, h), ...]` | Dimensions of each level |
157
+ | `level_downsamples` | `Tuple[float, ...]` | Downsample factor for each level |
158
+ | `properties` | `Mapping[str, str]` | Metadata properties (read-only mapping) |
159
+ | `associated_images` | `Mapping[str, PIL.Image]` | Associated images: macro, label, thumbnail |
160
+ | `color_profile` | `object \| None` | ICC color profile (currently returns `None`) |
161
+
162
+ ### Methods
163
+
164
+ | Method | Description |
165
+ |--------|-------------|
166
+ | `read_region(location, level, size)` | Read a region, returns **RGBA** image |
167
+ | `get_best_level_for_downsample(downsample)` | Pick the best pyramid level for a given downsample factor |
168
+ | `get_thumbnail(size)` | Generate a thumbnail |
169
+ | `set_cache(cache)` | API-compatible no-op |
170
+ | `close()` | Close and release resources |
171
+
172
+ ### Property constants
173
+
174
+ ```python
175
+ from kfbslide import (
176
+ PROPERTY_NAME_VENDOR, # "openslide.vendor"
177
+ PROPERTY_NAME_MPP_X, # "openslide.mpp-x"
178
+ PROPERTY_NAME_MPP_Y, # "openslide.mpp-y"
179
+ PROPERTY_NAME_OBJECTIVE_POWER, # "openslide.objective-power"
180
+ )
181
+ ```
182
+
183
+ ---
184
+
185
+ ## ⚡ Performance
186
+
187
+ Benchmarked on `sample.kfb` (71,748 × 56,282, 82,595 tiles):
188
+
189
+ | Operation | Time | Note |
190
+ |-----------|------|------|
191
+ | First read of 256×256 region | ~2.1 ms | Pillow backend |
192
+ | Cache-hit read | **~0.10 ms** | 22× faster |
193
+ | Scan 20 adjacent regions (first time) | ~33 ms | 1.6 ms/region |
194
+ | Scan 20 adjacent regions (cached) | **~2.2 ms** | 0.11 ms/region, 15× faster |
195
+
196
+ > Test environment: Python 3.12, Pillow, SSD.
197
+
198
+ ---
199
+
200
+ ## 🏗️ Architecture
201
+
202
+ <p align="center">
203
+ <img src="docs/fw_en.png" alt="KFBSlide Architecture" width="800">
204
+ </p>
205
+
206
+ KFBSlide is implemented entirely in pure Python, reading images by directly parsing the KFB binary format:
207
+
208
+ - **No C/C++ extensions or system dynamic libraries required**
209
+ - **No dependency on OpenSlide, libtiff, libjpeg, or other external libraries**
210
+ - **Single-file deployable, suitable for servers, containers, and embedded environments**
211
+
212
+ ---
213
+
214
+ ## 📁 Project Structure
215
+
216
+ ```
217
+ kfbslide/
218
+ ├── src/kfbslide/
219
+ │ ├── __init__.py # Package entry point, exports OpenSlide API
220
+ │ ├── _slide.py # OpenSlide main class
221
+ │ ├── _kfbformat.py # KFB binary format parser
222
+ │ ├── _cache.py # LRU tile cache
223
+ │ └── _exceptions.py # OpenSlideError / compatibility exceptions
224
+ ├── tests/ # Tests (includes sample.kfb symlink)
225
+ ├── examples/ # Example scripts
226
+ ├── docs/ # Documentation images
227
+ ├── README.md
228
+ ├── LICENSE
229
+ └── pyproject.toml
230
+ ```
231
+
232
+ ---
233
+
234
+ ## ⚠️ Known Limitations
235
+
236
+ 1. **Read-only**: Writing to KFB files is not currently supported.
237
+ 2. **KFB v1.6**: Verified on version 1.6 files. Other versions may require adaptation.
238
+ 3. **JPEG decoding**: Uses Pillow for JPEG decoding, consistent across all platforms.
239
+
240
+ ---
241
+
242
+ ## 📄 License
243
+
244
+ [MIT](LICENSE)
245
+
246
+ Copyright (c) 2026 Yifan Feng
@@ -0,0 +1,9 @@
1
+ kfbslide/__init__.py,sha256=lt47AacnbK5auoCCG72pb6Lmd4F-CGvxhLTTC0ozwX4,1693
2
+ kfbslide/_cache.py,sha256=x_ejyQ-GsLwL9C3zX3Fxxezuyhy-tDtTCECfUFapLEw,1056
3
+ kfbslide/_exceptions.py,sha256=JQ6ip_9l8UqIpbRB6AxF_oyosWFJnDHv6vMMrD284vQ,462
4
+ kfbslide/_kfbformat.py,sha256=xgvNV_pssxsijX9kHpwaZCmNYiDQGZCRgGAXeCyr0tI,7834
5
+ kfbslide/_slide.py,sha256=2NUvMkF8aVuELtAAOA4clreXnNF47EAMqmaB2IXFFxE,16998
6
+ kfbslide-0.2.0.dist-info/METADATA,sha256=teFhSVvUmD388DR9WdPlTwOtslWb9dG7cKF1x3q47Lk,7965
7
+ kfbslide-0.2.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
8
+ kfbslide-0.2.0.dist-info/licenses/LICENSE,sha256=giFRf5C_bgy4wEImlaDjXCYbghg5UFRkchQey2z_l_U,1067
9
+ kfbslide-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Yifan Feng
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.