seaotter 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seaotter-0.0.1/LICENSE +1 -0
- seaotter-0.0.1/PKG-INFO +16 -0
- seaotter-0.0.1/README.md +2 -0
- seaotter-0.0.1/pyproject.toml +22 -0
- seaotter-0.0.1/setup.cfg +4 -0
- seaotter-0.0.1/src/seaotter/__init__.py +3 -0
- seaotter-0.0.1/src/seaotter/transcode.py +117 -0
- seaotter-0.0.1/src/seaotter.egg-info/PKG-INFO +16 -0
- seaotter-0.0.1/src/seaotter.egg-info/SOURCES.txt +10 -0
- seaotter-0.0.1/src/seaotter.egg-info/dependency_links.txt +1 -0
- seaotter-0.0.1/src/seaotter.egg-info/requires.txt +1 -0
- seaotter-0.0.1/src/seaotter.egg-info/top_level.txt +1 -0
seaotter-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Contact Dan Jacobellis
|
seaotter-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: seaotter
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Sensor-Embedded Asymmetric codec with One-Time Transcode for Efficient Retrieval
|
|
5
|
+
Author-email: Dan Jacobellis <danjacobellis@utexas.edu>
|
|
6
|
+
License-Expression: LicenseRef-Proprietary
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.6
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: torch
|
|
13
|
+
Dynamic: license-file
|
|
14
|
+
|
|
15
|
+
# seaotter
|
|
16
|
+
Sensor-Embedded Asymmetric codec with One-Time Transcode for Efficient Retrieval
|
seaotter-0.0.1/README.md
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=42", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "seaotter"
|
|
7
|
+
version = "0.0.1"
|
|
8
|
+
description = "Sensor-Embedded Asymmetric codec with One-Time Transcode for Efficient Retrieval "
|
|
9
|
+
authors = [
|
|
10
|
+
{name = "Dan Jacobellis", email = "danjacobellis@utexas.edu"}
|
|
11
|
+
]
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
license = "LicenseRef-Proprietary"
|
|
14
|
+
license-files = ["LICENSE"]
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Operating System :: OS Independent"
|
|
18
|
+
]
|
|
19
|
+
dependencies = [
|
|
20
|
+
"torch",
|
|
21
|
+
]
|
|
22
|
+
requires-python = ">=3.6"
|
seaotter-0.0.1/setup.cfg
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""One-time transcode between the SEA OTTER compressed latent format and
|
|
2
|
+
standard image formats.
|
|
3
|
+
|
|
4
|
+
Sensor side (embedded, low power): run the lightweight encoder, quantize to
|
|
5
|
+
int8, and save each latent channel as a separate lossless JPEG-LS file.
|
|
6
|
+
|
|
7
|
+
Cloud side (GPU available): load the per-channel JPEG-LS files, regroup them
|
|
8
|
+
into the model's multi-scale latent structure, and run the heavy DNN decoder
|
|
9
|
+
once to produce a reconstruction that can then be re-saved in any standard
|
|
10
|
+
image format (PNG, JPEG, AVIF, ...).
|
|
11
|
+
|
|
12
|
+
The `encode` and `transcode` functions here are deliberately decoupled from a
|
|
13
|
+
specific autoencoder class. They rely only on the duck-typed interface:
|
|
14
|
+
model.encode(x) -> list[Tensor(B, C_group, H_s, W_s)]
|
|
15
|
+
model.decode(latents) -> Tensor(B, C, H, W)
|
|
16
|
+
model.scale_groups -> list[tuple[ps, start, end]]
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import io
|
|
20
|
+
|
|
21
|
+
import PIL.Image
|
|
22
|
+
import pillow_jpls # noqa: F401 registers the JPEG-LS codec with Pillow
|
|
23
|
+
import torch
|
|
24
|
+
from torchvision.transforms.v2.functional import pil_to_tensor, to_pil_image
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _channel_to_jpegls(plane_int8: torch.Tensor) -> bytes:
|
|
28
|
+
"""Encode a single int8 latent plane (H, W) as a JPEG-LS byte string.
|
|
29
|
+
|
|
30
|
+
int8 values in [-127, 127] are shifted by +127 into uint8 [0, 254] for
|
|
31
|
+
JPEG-LS, which stores unsigned samples.
|
|
32
|
+
"""
|
|
33
|
+
assert plane_int8.dim() == 2, f"expected 2D plane, got {tuple(plane_int8.shape)}"
|
|
34
|
+
uint8 = (plane_int8.to(torch.long) + 127).to(torch.uint8).cpu()
|
|
35
|
+
buff = io.BytesIO()
|
|
36
|
+
to_pil_image(uint8).save(buff, format="JPEG-LS")
|
|
37
|
+
return buff.getvalue()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _jpegls_to_channel(data: bytes) -> torch.Tensor:
|
|
41
|
+
"""Decode a JPEG-LS byte string back to an int8 (H, W) latent plane."""
|
|
42
|
+
img = PIL.Image.open(io.BytesIO(data))
|
|
43
|
+
uint8 = pil_to_tensor(img) # (1, H, W) uint8
|
|
44
|
+
return (uint8[0].to(torch.long) - 127).to(torch.int8)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def encode(model, x):
|
|
48
|
+
"""Encode an image through the codec to per-channel JPEG-LS bytes.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
model: an autoencoder exposing ``encode(x) -> list[Tensor]`` where each
|
|
52
|
+
latent has shape (B, C_group, H_s, W_s). ``scale_groups`` is used
|
|
53
|
+
by :func:`transcode` on the receiver side and must match.
|
|
54
|
+
x: (1, C, H, W) input tensor in the model's input domain (typically
|
|
55
|
+
linear RGB in [-1, 1] for this codec), placed on the same device
|
|
56
|
+
as the model.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
list[bytes]: one JPEG-LS file per latent channel, ordered by scale
|
|
60
|
+
group (coarsest first), then by channel index within each group.
|
|
61
|
+
"""
|
|
62
|
+
if x.shape[0] != 1:
|
|
63
|
+
raise ValueError(f"encode expects batch size 1, got {x.shape[0]}")
|
|
64
|
+
|
|
65
|
+
with torch.inference_mode():
|
|
66
|
+
latents = model.encode(x)
|
|
67
|
+
latents_q = [z.round().clamp(-127, 127).to(torch.int8) for z in latents]
|
|
68
|
+
|
|
69
|
+
files = []
|
|
70
|
+
for z in latents_q:
|
|
71
|
+
for c in range(z.shape[1]):
|
|
72
|
+
files.append(_channel_to_jpegls(z[0, c]))
|
|
73
|
+
return files
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def transcode(model, channel_files, device=None):
|
|
77
|
+
"""Decode per-channel JPEG-LS bytes through the DNN decoder.
|
|
78
|
+
|
|
79
|
+
This is the one-time transcode step: run it once in the cloud after data
|
|
80
|
+
leaves the sensor to obtain a reconstruction in a standard image format.
|
|
81
|
+
Subsequent retrieval (human viewing, ML training) can then read the
|
|
82
|
+
standard output without touching the large decoder again.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
model: the autoencoder whose scale structure matches ``encode``. Must
|
|
86
|
+
expose ``decode(latents)`` and ``scale_groups`` (list of
|
|
87
|
+
``(ps, start, end)`` tuples).
|
|
88
|
+
channel_files: list of bytes as produced by :func:`encode`, ordered by
|
|
89
|
+
scale group (coarsest first), then by channel within each group.
|
|
90
|
+
device: target device for the latents. Defaults to the model's device.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
Tensor of shape (1, C, H, W) in [-1, 1] — the decoded image. Convert
|
|
94
|
+
to [0, 1] and re-save as PNG/JPEG/... for downstream use.
|
|
95
|
+
"""
|
|
96
|
+
if device is None:
|
|
97
|
+
device = next(model.parameters()).device
|
|
98
|
+
|
|
99
|
+
expected = sum(end - start for _, start, end in model.scale_groups)
|
|
100
|
+
if len(channel_files) != expected:
|
|
101
|
+
raise ValueError(
|
|
102
|
+
f"expected {expected} channel files for model.scale_groups="
|
|
103
|
+
f"{list(model.scale_groups)}, got {len(channel_files)}"
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
latents = []
|
|
107
|
+
idx = 0
|
|
108
|
+
for _ps, start, end in model.scale_groups:
|
|
109
|
+
n_ch = end - start
|
|
110
|
+
planes = [_jpegls_to_channel(channel_files[idx + c]) for c in range(n_ch)]
|
|
111
|
+
idx += n_ch
|
|
112
|
+
z = torch.stack(planes, dim=0).unsqueeze(0).to(device) # (1, C_group, H, W)
|
|
113
|
+
latents.append(z)
|
|
114
|
+
|
|
115
|
+
with torch.inference_mode():
|
|
116
|
+
xhat = model.decode(latents).clamp(-1, 1)
|
|
117
|
+
return xhat
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: seaotter
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Sensor-Embedded Asymmetric codec with One-Time Transcode for Efficient Retrieval
|
|
5
|
+
Author-email: Dan Jacobellis <danjacobellis@utexas.edu>
|
|
6
|
+
License-Expression: LicenseRef-Proprietary
|
|
7
|
+
Classifier: Programming Language :: Python :: 3
|
|
8
|
+
Classifier: Operating System :: OS Independent
|
|
9
|
+
Requires-Python: >=3.6
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: torch
|
|
13
|
+
Dynamic: license-file
|
|
14
|
+
|
|
15
|
+
# seaotter
|
|
16
|
+
Sensor-Embedded Asymmetric codec with One-Time Transcode for Efficient Retrieval
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/seaotter/__init__.py
|
|
5
|
+
src/seaotter/transcode.py
|
|
6
|
+
src/seaotter.egg-info/PKG-INFO
|
|
7
|
+
src/seaotter.egg-info/SOURCES.txt
|
|
8
|
+
src/seaotter.egg-info/dependency_links.txt
|
|
9
|
+
src/seaotter.egg-info/requires.txt
|
|
10
|
+
src/seaotter.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
torch
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
seaotter
|