oaknut-codecs 12.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Robert Smallshire
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,51 @@
1
+ Metadata-Version: 2.4
2
+ Name: oaknut-codecs
3
+ Version: 12.7.2
4
+ Summary: Text codecs for Acorn computer character sets (BBC Micro / Electron)
5
+ Author-email: Robert Smallshire <robert@smallshire.org.uk>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-codecs
8
+ Project-URL: Repository, https://github.com/rob-smallshire/oaknut
9
+ Project-URL: Issues, https://github.com/rob-smallshire/oaknut/issues
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Dynamic: license-file
19
+
20
+ # oaknut-codecs
21
+
22
+ Text codecs for Acorn computer character sets, part of the
23
+ [oaknut](https://github.com/rob-smallshire/oaknut) family of packages for
24
+ working with Acorn computer filesystems, files, and formats.
25
+
26
+ This package provides a Python codec for the **Acorn/BBC Micro character
27
+ set** — a variant of ASCII in which `&60` is the pound sign `£` and `&7C`
28
+ is the broken bar `¦`. Importing the package registers the codec under
29
+ the name `"acorn"`, so it works with the standard `str.encode` /
30
+ `bytes.decode` machinery:
31
+
32
+ ```python
33
+ import oaknut.codecs # registers the "acorn" codec
34
+
35
+ "COST£100".encode("acorn") # b'COST\x60100'
36
+ b"COST\x60100".decode("acorn") # 'COST£100'
37
+ ```
38
+
39
+ It is the dependency-free bottom layer of the workspace, alongside
40
+ `oaknut-exception`, so that language and file packages can share one
41
+ codec implementation without taking a dependency on each other.
42
+
43
+ ## Installation
44
+
45
+ ```sh
46
+ uv add oaknut-codecs
47
+ ```
48
+
49
+ ## Licence
50
+
51
+ MIT
@@ -0,0 +1,32 @@
1
+ # oaknut-codecs
2
+
3
+ Text codecs for Acorn computer character sets, part of the
4
+ [oaknut](https://github.com/rob-smallshire/oaknut) family of packages for
5
+ working with Acorn computer filesystems, files, and formats.
6
+
7
+ This package provides a Python codec for the **Acorn/BBC Micro character
8
+ set** — a variant of ASCII in which `&60` is the pound sign `£` and `&7C`
9
+ is the broken bar `¦`. Importing the package registers the codec under
10
+ the name `"acorn"`, so it works with the standard `str.encode` /
11
+ `bytes.decode` machinery:
12
+
13
+ ```python
14
+ import oaknut.codecs # registers the "acorn" codec
15
+
16
+ "COST£100".encode("acorn") # b'COST\x60100'
17
+ b"COST\x60100".decode("acorn") # 'COST£100'
18
+ ```
19
+
20
+ It is the dependency-free bottom layer of the workspace, alongside
21
+ `oaknut-exception`, so that language and file packages can share one
22
+ codec implementation without taking a dependency on each other.
23
+
24
+ ## Installation
25
+
26
+ ```sh
27
+ uv add oaknut-codecs
28
+ ```
29
+
30
+ ## Licence
31
+
32
+ MIT
@@ -0,0 +1,46 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "oaknut-codecs"
7
+ dynamic = ["version"]
8
+ authors = [{ name = "Robert Smallshire", email = "robert@smallshire.org.uk" }]
9
+ description = "Text codecs for Acorn computer character sets (BBC Micro / Electron)"
10
+ readme = "README.md"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ requires-python = ">=3.11"
14
+ classifiers = [
15
+ "Development Status :: 3 - Alpha",
16
+ "Intended Audience :: Developers",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
20
+ ]
21
+ # The codecs are implemented entirely against the standard library's
22
+ # `codecs` machinery, so this package has no runtime dependencies. It is
23
+ # the shared bottom layer that lets siblings use the "acorn" encoding
24
+ # without depending on oaknut-file.
25
+ dependencies = []
26
+
27
+ [project.urls]
28
+ Homepage = "https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-codecs"
29
+ Repository = "https://github.com/rob-smallshire/oaknut"
30
+ Issues = "https://github.com/rob-smallshire/oaknut/issues"
31
+
32
+ [dependency-groups]
33
+ test = [
34
+ "pytest>=8.0",
35
+ ]
36
+ dev = [
37
+ "bump-my-version>=0.28.0",
38
+ "pre-commit>=3.0",
39
+ {include-group = "test"},
40
+ ]
41
+
42
+ [tool.setuptools.dynamic]
43
+ version = { attr = "oaknut.codecs.__version__" }
44
+
45
+ [tool.setuptools.packages.find]
46
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,32 @@
1
+ """Text codecs for Acorn computer character sets.
2
+
3
+ This is the bottom layer of the oaknut workspace alongside
4
+ ``oaknut-exception``: a dependency-free home for the character-set codecs
5
+ shared by the file, filesystem and language packages. Splitting the
6
+ codecs out lets siblings such as ``oaknut-basic`` use the ``"acorn"``
7
+ encoding without taking a dependency on ``oaknut-file``.
8
+
9
+ Importing this package registers the ``"acorn"`` codec as a side effect,
10
+ so ``"text".encode("acorn")`` works anywhere once any oaknut package has
11
+ been imported.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from oaknut.codecs.acorn import (
17
+ BBC_MICRO_TO_UNICODE,
18
+ UNICODE_TO_BBC_MICRO,
19
+ AcornCodec,
20
+ acorn_to_unicode,
21
+ unicode_to_acorn,
22
+ )
23
+
24
+ __version__ = "12.7.2"
25
+
26
+ __all__ = [
27
+ "BBC_MICRO_TO_UNICODE",
28
+ "UNICODE_TO_BBC_MICRO",
29
+ "AcornCodec",
30
+ "acorn_to_unicode",
31
+ "unicode_to_acorn",
32
+ ]
@@ -0,0 +1,161 @@
1
+ """Character encoding for the Acorn/BBC Micro character set.
2
+
3
+ The BBC Micro and Acorn Electron used a variant of ASCII with a couple
4
+ of UK-specific characters. This module implements a Python codec for
5
+ that encoding and registers it under the name ``"acorn"``::
6
+
7
+ text = "COST£100"
8
+ data = text.encode("acorn") # b"COST\\x60100"
9
+ back = data.decode("acorn") # "COST£100"
10
+
11
+ Importing this module (or its package, :mod:`oaknut.codecs`) registers
12
+ the codec as a side effect, after which ``"acorn"`` resolves alongside
13
+ the stdlib encodings everywhere in the process.
14
+
15
+ References:
16
+ - https://beebwiki.mdfs.net/ASCII
17
+ - https://www.acornelectron.co.uk/ugs/electron/acorn_computers/ug-english/appendix_f_eng.html
18
+ - https://tobylobster.github.io/mos/mos/S-s4.html
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import codecs
24
+
25
+ # BBC Micro (MODEs 0-6) character mappings: Acorn byte values that differ
26
+ # from ASCII, mapped to the Unicode characters they stand for.
27
+ BBC_MICRO_TO_UNICODE = {
28
+ 0x60: "£", # Backtick replaced with pound sign
29
+ 0x7C: "¦", # Vertical bar replaced with broken bar
30
+ }
31
+
32
+ # Reverse mapping for encoding Unicode to BBC Micro bytes.
33
+ UNICODE_TO_BBC_MICRO = {v: k for k, v in BBC_MICRO_TO_UNICODE.items()}
34
+
35
+
36
+ class AcornCodec(codecs.Codec):
37
+ """Codec for the Acorn/BBC Micro character encoding."""
38
+
39
+ def encode(self, input: str, errors: str = "strict") -> tuple[bytes, int]:
40
+ """Encode a Unicode string to Acorn bytes.
41
+
42
+ Args:
43
+ input: Unicode string to encode.
44
+ errors: Error handling ('strict', 'ignore', 'replace').
45
+
46
+ Returns:
47
+ Tuple of (encoded bytes, length of input consumed).
48
+ """
49
+ output = bytearray()
50
+ for i, char in enumerate(input):
51
+ if char in UNICODE_TO_BBC_MICRO:
52
+ output.append(UNICODE_TO_BBC_MICRO[char])
53
+ else:
54
+ code_point = ord(char)
55
+ if code_point > 255:
56
+ if errors == "strict":
57
+ raise UnicodeEncodeError(
58
+ "acorn",
59
+ input,
60
+ i,
61
+ i + 1,
62
+ f"Character '{char}' (U+{code_point:04X}) cannot be "
63
+ f"encoded in Acorn character set",
64
+ )
65
+ elif errors == "ignore":
66
+ continue
67
+ elif errors == "replace":
68
+ output.append(ord("?"))
69
+ else:
70
+ raise ValueError(f"Unknown error handling: {errors}")
71
+ else:
72
+ output.append(code_point)
73
+
74
+ return bytes(output), len(input)
75
+
76
+ def decode(self, input: bytes, errors: str = "strict") -> tuple[str, int]:
77
+ """Decode Acorn bytes to a Unicode string.
78
+
79
+ Args:
80
+ input: Bytes in Acorn encoding.
81
+ errors: Error handling ('strict', 'ignore', 'replace').
82
+
83
+ Returns:
84
+ Tuple of (decoded string, length of input consumed).
85
+ """
86
+ output = []
87
+ for byte in input:
88
+ if byte in BBC_MICRO_TO_UNICODE:
89
+ output.append(BBC_MICRO_TO_UNICODE[byte])
90
+ else:
91
+ # Standard ASCII or high-bit characters.
92
+ output.append(chr(byte))
93
+
94
+ return "".join(output), len(input)
95
+
96
+
97
+ # The Acorn codec is byte-for-byte stateless, so a single shared instance
98
+ # backs every registry entry point — the incremental encoder and decoder,
99
+ # the stream reader/writer, and the non-incremental encode/decode callables.
100
+ _SHARED_ACORN_CODEC = AcornCodec()
101
+
102
+
103
+ class AcornIncrementalEncoder(codecs.IncrementalEncoder):
104
+ """Incremental encoder for Acorn encoding."""
105
+
106
+ def encode(self, input: str, final: bool = False) -> bytes:
107
+ return _SHARED_ACORN_CODEC.encode(input, self.errors)[0]
108
+
109
+
110
+ class AcornIncrementalDecoder(codecs.IncrementalDecoder):
111
+ """Incremental decoder for Acorn encoding."""
112
+
113
+ def decode(self, input: bytes, final: bool = False) -> str:
114
+ return _SHARED_ACORN_CODEC.decode(input, self.errors)[0]
115
+
116
+
117
+ class AcornStreamWriter(AcornCodec, codecs.StreamWriter):
118
+ """Stream writer for Acorn encoding."""
119
+
120
+
121
+ class AcornStreamReader(AcornCodec, codecs.StreamReader):
122
+ """Stream reader for Acorn encoding."""
123
+
124
+
125
+ def getregentry(name: str | None = None) -> codecs.CodecInfo:
126
+ """Return the codec registry entry for the Acorn encoding."""
127
+ return codecs.CodecInfo(
128
+ name="acorn",
129
+ encode=_SHARED_ACORN_CODEC.encode,
130
+ decode=_SHARED_ACORN_CODEC.decode,
131
+ incrementalencoder=AcornIncrementalEncoder,
132
+ incrementaldecoder=AcornIncrementalDecoder,
133
+ streamreader=AcornStreamReader,
134
+ streamwriter=AcornStreamWriter,
135
+ )
136
+
137
+
138
+ def search_function(encoding: str) -> codecs.CodecInfo | None:
139
+ """Codec registry search function recognising the ``"acorn"`` name."""
140
+ if encoding.lower() == "acorn":
141
+ return getregentry(encoding)
142
+ return None
143
+
144
+
145
+ # Register the codec as an import side effect.
146
+ codecs.register(search_function)
147
+
148
+
149
+ def acorn_to_unicode(data: bytes) -> str:
150
+ """Decode Acorn-encoded bytes to a Unicode string."""
151
+ return data.decode("acorn")
152
+
153
+
154
+ def unicode_to_acorn(text: str) -> bytes:
155
+ """Encode a Unicode string to Acorn-encoded bytes.
156
+
157
+ Raises:
158
+ UnicodeEncodeError: If *text* contains characters that cannot be
159
+ encoded in the Acorn character set.
160
+ """
161
+ return text.encode("acorn")
@@ -0,0 +1,51 @@
1
+ Metadata-Version: 2.4
2
+ Name: oaknut-codecs
3
+ Version: 12.7.2
4
+ Summary: Text codecs for Acorn computer character sets (BBC Micro / Electron)
5
+ Author-email: Robert Smallshire <robert@smallshire.org.uk>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-codecs
8
+ Project-URL: Repository, https://github.com/rob-smallshire/oaknut
9
+ Project-URL: Issues, https://github.com/rob-smallshire/oaknut/issues
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Dynamic: license-file
19
+
20
+ # oaknut-codecs
21
+
22
+ Text codecs for Acorn computer character sets, part of the
23
+ [oaknut](https://github.com/rob-smallshire/oaknut) family of packages for
24
+ working with Acorn computer filesystems, files, and formats.
25
+
26
+ This package provides a Python codec for the **Acorn/BBC Micro character
27
+ set** — a variant of ASCII in which `&60` is the pound sign `£` and `&7C`
28
+ is the broken bar `¦`. Importing the package registers the codec under
29
+ the name `"acorn"`, so it works with the standard `str.encode` /
30
+ `bytes.decode` machinery:
31
+
32
+ ```python
33
+ import oaknut.codecs # registers the "acorn" codec
34
+
35
+ "COST£100".encode("acorn") # b'COST\x60100'
36
+ b"COST\x60100".decode("acorn") # 'COST£100'
37
+ ```
38
+
39
+ It is the dependency-free bottom layer of the workspace, alongside
40
+ `oaknut-exception`, so that language and file packages can share one
41
+ codec implementation without taking a dependency on each other.
42
+
43
+ ## Installation
44
+
45
+ ```sh
46
+ uv add oaknut-codecs
47
+ ```
48
+
49
+ ## Licence
50
+
51
+ MIT
@@ -0,0 +1,10 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/oaknut/codecs/__init__.py
5
+ src/oaknut/codecs/acorn.py
6
+ src/oaknut_codecs.egg-info/PKG-INFO
7
+ src/oaknut_codecs.egg-info/SOURCES.txt
8
+ src/oaknut_codecs.egg-info/dependency_links.txt
9
+ src/oaknut_codecs.egg-info/top_level.txt
10
+ tests/test_acorn.py
@@ -0,0 +1,139 @@
1
+ """Tests for the Acorn/BBC Micro character codec."""
2
+
3
+ import oaknut.codecs # noqa: F401 - registers the "acorn" codec
4
+ import pytest
5
+ from oaknut.codecs import acorn_to_unicode, unicode_to_acorn
6
+
7
+
8
+ class TestAcornToUnicode:
9
+ """Tests for decoding Acorn bytes to Unicode."""
10
+
11
+ def test_standard_ascii(self):
12
+ assert acorn_to_unicode(b"HELLO") == "HELLO"
13
+
14
+ def test_pound_sign_bbc_micro(self):
15
+ assert acorn_to_unicode(b"COST\x60100") == "COST£100"
16
+
17
+ def test_broken_bar_bbc_micro(self):
18
+ assert acorn_to_unicode(b"A\x7cB") == "A¦B"
19
+
20
+ def test_mixed_characters(self):
21
+ assert acorn_to_unicode(b"PRICE:\x60500") == "PRICE:£500"
22
+
23
+ def test_empty_bytes(self):
24
+ assert acorn_to_unicode(b"") == ""
25
+
26
+ def test_high_bit_characters(self):
27
+ assert len(acorn_to_unicode(bytes([0x80, 0xFF]))) == 2
28
+
29
+
30
+ class TestUnicodeToAcorn:
31
+ """Tests for encoding Unicode to Acorn bytes."""
32
+
33
+ def test_standard_ascii(self):
34
+ assert unicode_to_acorn("HELLO") == b"HELLO"
35
+
36
+ def test_pound_sign_bbc_micro(self):
37
+ assert unicode_to_acorn("COST£100") == b"COST\x60100"
38
+
39
+ def test_broken_bar_bbc_micro(self):
40
+ assert unicode_to_acorn("A¦B") == b"A\x7cB"
41
+
42
+ def test_empty_string(self):
43
+ assert unicode_to_acorn("") == b""
44
+
45
+ def test_round_trip_bbc_micro(self):
46
+ original = "FILE£NAME"
47
+ assert acorn_to_unicode(unicode_to_acorn(original)) == original
48
+
49
+ def test_invalid_character_raises(self):
50
+ with pytest.raises(ValueError, match="cannot be encoded"):
51
+ unicode_to_acorn("HELLO\U0001f4be") # floppy disk emoji
52
+
53
+ def test_high_unicode_raises(self):
54
+ with pytest.raises(ValueError, match="cannot be encoded"):
55
+ unicode_to_acorn("TEST™") # U+2122
56
+
57
+
58
+ class TestCodecInterface:
59
+ """Tests for the Python codec interface."""
60
+
61
+ def test_encode_with_codec(self):
62
+ assert "HELLO".encode("acorn") == b"HELLO"
63
+
64
+ def test_decode_with_codec(self):
65
+ assert b"HELLO".decode("acorn") == "HELLO"
66
+
67
+ def test_encode_pound_sign(self):
68
+ assert "£100".encode("acorn") == b"\x60100"
69
+
70
+ def test_decode_pound_sign(self):
71
+ assert b"\x60100".decode("acorn") == "£100"
72
+
73
+ def test_codec_round_trip(self):
74
+ original = "TEST£FILE"
75
+ assert original.encode("acorn").decode("acorn") == original
76
+
77
+ def test_codec_name_is_case_insensitive(self):
78
+ assert "£".encode("acorn") == b"\x60"
79
+ assert "£".encode("ACORN") == b"\x60"
80
+
81
+ def test_encode_errors_strict(self):
82
+ with pytest.raises(UnicodeEncodeError):
83
+ "TEST™".encode("acorn", errors="strict")
84
+
85
+ def test_encode_errors_ignore(self):
86
+ assert "TEST™OK".encode("acorn", errors="ignore") == b"TESTOK"
87
+
88
+ def test_encode_errors_replace(self):
89
+ assert "TEST™".encode("acorn", errors="replace") == b"TEST?"
90
+
91
+ def test_codec_with_file_like(self):
92
+ import io
93
+
94
+ buffer = io.BytesIO()
95
+ writer = io.TextIOWrapper(buffer, encoding="acorn")
96
+ writer.write("£100")
97
+ writer.flush()
98
+
99
+ buffer.seek(0)
100
+ reader = io.TextIOWrapper(buffer, encoding="acorn")
101
+ assert reader.read() == "£100"
102
+
103
+
104
+ class TestCodecRegistry:
105
+ """Exercise the bits of codec registry integration beyond str.encode."""
106
+
107
+ def test_codecs_lookup_returns_info(self):
108
+ import codecs
109
+
110
+ assert codecs.lookup("acorn").name == "acorn"
111
+
112
+ def test_incremental_encoder_streams_chunks(self):
113
+ import codecs
114
+
115
+ encoder = codecs.getincrementalencoder("acorn")()
116
+ out = encoder.encode("COST") + encoder.encode("£") + encoder.encode("100", final=True)
117
+ assert out == b"COST\x60100"
118
+
119
+ def test_incremental_decoder_streams_chunks(self):
120
+ import codecs
121
+
122
+ decoder = codecs.getincrementaldecoder("acorn")()
123
+ out = decoder.decode(b"COST") + decoder.decode(b"\x60") + decoder.decode(b"100", final=True)
124
+ assert out == "COST£100"
125
+
126
+ def test_incremental_encoder_respects_errors(self):
127
+ import codecs
128
+
129
+ encoder = codecs.getincrementalencoder("acorn")(errors="replace")
130
+ assert encoder.encode("AĀB") == b"A?B"
131
+
132
+ def test_iterencode_and_iterdecode(self):
133
+ import codecs
134
+
135
+ out = b"".join(codecs.iterencode(iter(["COST", "£", "100"]), "acorn"))
136
+ assert out == b"COST\x60100"
137
+
138
+ text = "".join(codecs.iterdecode(iter([b"COST", b"\x60", b"100"]), "acorn"))
139
+ assert text == "COST£100"