seaotter 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
seaotter-0.0.1/LICENSE ADDED
@@ -0,0 +1 @@
1
+ Contact Dan Jacobellis
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.4
2
+ Name: seaotter
3
+ Version: 0.0.1
4
+ Summary: Sensor-Embedded Asymmetric codec with One-Time Transcode for Efficient Retrieval
5
+ Author-email: Dan Jacobellis <danjacobellis@utexas.edu>
6
+ License-Expression: LicenseRef-Proprietary
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.6
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: torch
13
+ Dynamic: license-file
14
+
15
+ # seaotter
16
+ Sensor-Embedded Asymmetric codec with One-Time Transcode for Efficient Retrieval
@@ -0,0 +1,2 @@
1
+ # seaotter
2
+ Sensor-Embedded Asymmetric codec with One-Time Transcode for Efficient Retrieval
@@ -0,0 +1,22 @@
1
+ [build-system]
2
+ requires = ["setuptools>=42", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "seaotter"
7
+ version = "0.0.1"
8
+ description = "Sensor-Embedded Asymmetric codec with One-Time Transcode for Efficient Retrieval "
9
+ authors = [
10
+ {name = "Dan Jacobellis", email = "danjacobellis@utexas.edu"}
11
+ ]
12
+ readme = "README.md"
13
+ license = "LicenseRef-Proprietary"
14
+ license-files = ["LICENSE"]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "Operating System :: OS Independent"
18
+ ]
19
+ dependencies = [
20
+ "torch",
21
+ ]
22
+ requires-python = ">=3.6"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ from .transcode import encode, transcode
2
+
3
+ __all__ = ["encode", "transcode"]
@@ -0,0 +1,117 @@
1
+ """One-time transcode between the SEA OTTER compressed latent format and
2
+ standard image formats.
3
+
4
+ Sensor side (embedded, low power): run the lightweight encoder, quantize to
5
+ int8, and save each latent channel as a separate lossless JPEG-LS file.
6
+
7
+ Cloud side (GPU available): load the per-channel JPEG-LS files, regroup them
8
+ into the model's multi-scale latent structure, and run the heavy DNN decoder
9
+ once to produce a reconstruction that can then be re-saved in any standard
10
+ image format (PNG, JPEG, AVIF, ...).
11
+
12
+ The `encode` and `transcode` functions here are deliberately decoupled from a
13
+ specific autoencoder class. They rely only on the duck-typed interface:
14
+ model.encode(x) -> list[Tensor(B, C_group, H_s, W_s)]
15
+ model.decode(latents) -> Tensor(B, C, H, W)
16
+ model.scale_groups -> list[tuple[ps, start, end]]
17
+ """
18
+
19
+ import io
20
+
21
+ import PIL.Image
22
+ import pillow_jpls # noqa: F401 registers the JPEG-LS codec with Pillow
23
+ import torch
24
+ from torchvision.transforms.v2.functional import pil_to_tensor, to_pil_image
25
+
26
+
27
+ def _channel_to_jpegls(plane_int8: torch.Tensor) -> bytes:
28
+ """Encode a single int8 latent plane (H, W) as a JPEG-LS byte string.
29
+
30
+ int8 values in [-127, 127] are shifted by +127 into uint8 [0, 254] for
31
+ JPEG-LS, which stores unsigned samples.
32
+ """
33
+ assert plane_int8.dim() == 2, f"expected 2D plane, got {tuple(plane_int8.shape)}"
34
+ uint8 = (plane_int8.to(torch.long) + 127).to(torch.uint8).cpu()
35
+ buff = io.BytesIO()
36
+ to_pil_image(uint8).save(buff, format="JPEG-LS")
37
+ return buff.getvalue()
38
+
39
+
40
+ def _jpegls_to_channel(data: bytes) -> torch.Tensor:
41
+ """Decode a JPEG-LS byte string back to an int8 (H, W) latent plane."""
42
+ img = PIL.Image.open(io.BytesIO(data))
43
+ uint8 = pil_to_tensor(img) # (1, H, W) uint8
44
+ return (uint8[0].to(torch.long) - 127).to(torch.int8)
45
+
46
+
47
+ def encode(model, x):
48
+ """Encode an image through the codec to per-channel JPEG-LS bytes.
49
+
50
+ Args:
51
+ model: an autoencoder exposing ``encode(x) -> list[Tensor]`` where each
52
+ latent has shape (B, C_group, H_s, W_s). ``scale_groups`` is used
53
+ by :func:`transcode` on the receiver side and must match.
54
+ x: (1, C, H, W) input tensor in the model's input domain (typically
55
+ linear RGB in [-1, 1] for this codec), placed on the same device
56
+ as the model.
57
+
58
+ Returns:
59
+ list[bytes]: one JPEG-LS file per latent channel, ordered by scale
60
+ group (coarsest first), then by channel index within each group.
61
+ """
62
+ if x.shape[0] != 1:
63
+ raise ValueError(f"encode expects batch size 1, got {x.shape[0]}")
64
+
65
+ with torch.inference_mode():
66
+ latents = model.encode(x)
67
+ latents_q = [z.round().clamp(-127, 127).to(torch.int8) for z in latents]
68
+
69
+ files = []
70
+ for z in latents_q:
71
+ for c in range(z.shape[1]):
72
+ files.append(_channel_to_jpegls(z[0, c]))
73
+ return files
74
+
75
+
76
+ def transcode(model, channel_files, device=None):
77
+ """Decode per-channel JPEG-LS bytes through the DNN decoder.
78
+
79
+ This is the one-time transcode step: run it once in the cloud after data
80
+ leaves the sensor to obtain a reconstruction in a standard image format.
81
+ Subsequent retrieval (human viewing, ML training) can then read the
82
+ standard output without touching the large decoder again.
83
+
84
+ Args:
85
+ model: the autoencoder whose scale structure matches ``encode``. Must
86
+ expose ``decode(latents)`` and ``scale_groups`` (list of
87
+ ``(ps, start, end)`` tuples).
88
+ channel_files: list of bytes as produced by :func:`encode`, ordered by
89
+ scale group (coarsest first), then by channel within each group.
90
+ device: target device for the latents. Defaults to the model's device.
91
+
92
+ Returns:
93
+ Tensor of shape (1, C, H, W) in [-1, 1] — the decoded image. Convert
94
+ to [0, 1] and re-save as PNG/JPEG/... for downstream use.
95
+ """
96
+ if device is None:
97
+ device = next(model.parameters()).device
98
+
99
+ expected = sum(end - start for _, start, end in model.scale_groups)
100
+ if len(channel_files) != expected:
101
+ raise ValueError(
102
+ f"expected {expected} channel files for model.scale_groups="
103
+ f"{list(model.scale_groups)}, got {len(channel_files)}"
104
+ )
105
+
106
+ latents = []
107
+ idx = 0
108
+ for _ps, start, end in model.scale_groups:
109
+ n_ch = end - start
110
+ planes = [_jpegls_to_channel(channel_files[idx + c]) for c in range(n_ch)]
111
+ idx += n_ch
112
+ z = torch.stack(planes, dim=0).unsqueeze(0).to(device) # (1, C_group, H, W)
113
+ latents.append(z)
114
+
115
+ with torch.inference_mode():
116
+ xhat = model.decode(latents).clamp(-1, 1)
117
+ return xhat
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.4
2
+ Name: seaotter
3
+ Version: 0.0.1
4
+ Summary: Sensor-Embedded Asymmetric codec with One-Time Transcode for Efficient Retrieval
5
+ Author-email: Dan Jacobellis <danjacobellis@utexas.edu>
6
+ License-Expression: LicenseRef-Proprietary
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: OS Independent
9
+ Requires-Python: >=3.6
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: torch
13
+ Dynamic: license-file
14
+
15
+ # seaotter
16
+ Sensor-Embedded Asymmetric codec with One-Time Transcode for Efficient Retrieval
@@ -0,0 +1,10 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/seaotter/__init__.py
5
+ src/seaotter/transcode.py
6
+ src/seaotter.egg-info/PKG-INFO
7
+ src/seaotter.egg-info/SOURCES.txt
8
+ src/seaotter.egg-info/dependency_links.txt
9
+ src/seaotter.egg-info/requires.txt
10
+ src/seaotter.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ torch
@@ -0,0 +1 @@
1
+ seaotter