oaknut-codecs 12.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,32 @@
1
+ """Text codecs for Acorn computer character sets.
2
+
3
+ This is the bottom layer of the oaknut workspace alongside
4
+ ``oaknut-exception``: a dependency-free home for the character-set codecs
5
+ shared by the file, filesystem and language packages. Splitting the
6
+ codecs out lets siblings such as ``oaknut-basic`` use the ``"acorn"``
7
+ encoding without taking a dependency on ``oaknut-file``.
8
+
9
+ Importing this package registers the ``"acorn"`` codec as a side effect,
10
+ so ``"text".encode("acorn")`` works anywhere once any oaknut package has
11
+ been imported.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from oaknut.codecs.acorn import (
17
+ BBC_MICRO_TO_UNICODE,
18
+ UNICODE_TO_BBC_MICRO,
19
+ AcornCodec,
20
+ acorn_to_unicode,
21
+ unicode_to_acorn,
22
+ )
23
+
24
+ __version__ = "12.7.2"
25
+
26
+ __all__ = [
27
+ "BBC_MICRO_TO_UNICODE",
28
+ "UNICODE_TO_BBC_MICRO",
29
+ "AcornCodec",
30
+ "acorn_to_unicode",
31
+ "unicode_to_acorn",
32
+ ]
oaknut/codecs/acorn.py ADDED
@@ -0,0 +1,161 @@
1
+ """Character encoding for the Acorn/BBC Micro character set.
2
+
3
+ The BBC Micro and Acorn Electron used a variant of ASCII with a couple
4
+ of UK-specific characters. This module implements a Python codec for
5
+ that encoding and registers it under the name ``"acorn"``::
6
+
7
+ text = "COST£100"
8
+ data = text.encode("acorn") # b"COST\\x60100"
9
+ back = data.decode("acorn") # "COST£100"
10
+
11
+ Importing this module (or its package, :mod:`oaknut.codecs`) registers
12
+ the codec as a side effect, after which ``"acorn"`` resolves alongside
13
+ the stdlib encodings everywhere in the process.
14
+
15
+ References:
16
+ - https://beebwiki.mdfs.net/ASCII
17
+ - https://www.acornelectron.co.uk/ugs/electron/acorn_computers/ug-english/appendix_f_eng.html
18
+ - https://tobylobster.github.io/mos/mos/S-s4.html
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import codecs
24
+
25
+ # BBC Micro (MODEs 0-6) character mappings: Acorn byte values that differ
26
+ # from ASCII, mapped to the Unicode characters they stand for.
27
+ BBC_MICRO_TO_UNICODE = {
28
+ 0x60: "£", # Backtick replaced with pound sign
29
+ 0x7C: "¦", # Vertical bar replaced with broken bar
30
+ }
31
+
32
+ # Reverse mapping for encoding Unicode to BBC Micro bytes.
33
+ UNICODE_TO_BBC_MICRO = {v: k for k, v in BBC_MICRO_TO_UNICODE.items()}
34
+
35
+
36
+ class AcornCodec(codecs.Codec):
37
+ """Codec for the Acorn/BBC Micro character encoding."""
38
+
39
+ def encode(self, input: str, errors: str = "strict") -> tuple[bytes, int]:
40
+ """Encode a Unicode string to Acorn bytes.
41
+
42
+ Args:
43
+ input: Unicode string to encode.
44
+ errors: Error handling ('strict', 'ignore', 'replace').
45
+
46
+ Returns:
47
+ Tuple of (encoded bytes, length of input consumed).
48
+ """
49
+ output = bytearray()
50
+ for i, char in enumerate(input):
51
+ if char in UNICODE_TO_BBC_MICRO:
52
+ output.append(UNICODE_TO_BBC_MICRO[char])
53
+ else:
54
+ code_point = ord(char)
55
+ if code_point > 255:
56
+ if errors == "strict":
57
+ raise UnicodeEncodeError(
58
+ "acorn",
59
+ input,
60
+ i,
61
+ i + 1,
62
+ f"Character '{char}' (U+{code_point:04X}) cannot be "
63
+ f"encoded in Acorn character set",
64
+ )
65
+ elif errors == "ignore":
66
+ continue
67
+ elif errors == "replace":
68
+ output.append(ord("?"))
69
+ else:
70
+ raise ValueError(f"Unknown error handling: {errors}")
71
+ else:
72
+ output.append(code_point)
73
+
74
+ return bytes(output), len(input)
75
+
76
+ def decode(self, input: bytes, errors: str = "strict") -> tuple[str, int]:
77
+ """Decode Acorn bytes to a Unicode string.
78
+
79
+ Args:
80
+ input: Bytes in Acorn encoding.
81
+ errors: Error handling ('strict', 'ignore', 'replace').
82
+
83
+ Returns:
84
+ Tuple of (decoded string, length of input consumed).
85
+ """
86
+ output = []
87
+ for byte in input:
88
+ if byte in BBC_MICRO_TO_UNICODE:
89
+ output.append(BBC_MICRO_TO_UNICODE[byte])
90
+ else:
91
+ # Standard ASCII or high-bit characters.
92
+ output.append(chr(byte))
93
+
94
+ return "".join(output), len(input)
95
+
96
+
97
+ # The Acorn codec is byte-for-byte stateless, so a single shared instance
98
+ # backs every registry entry point — the incremental encoder and decoder,
99
+ # the stream reader/writer, and the non-incremental encode/decode callables.
100
+ _SHARED_ACORN_CODEC = AcornCodec()
101
+
102
+
103
+ class AcornIncrementalEncoder(codecs.IncrementalEncoder):
104
+ """Incremental encoder for Acorn encoding."""
105
+
106
+ def encode(self, input: str, final: bool = False) -> bytes:
107
+ return _SHARED_ACORN_CODEC.encode(input, self.errors)[0]
108
+
109
+
110
+ class AcornIncrementalDecoder(codecs.IncrementalDecoder):
111
+ """Incremental decoder for Acorn encoding."""
112
+
113
+ def decode(self, input: bytes, final: bool = False) -> str:
114
+ return _SHARED_ACORN_CODEC.decode(input, self.errors)[0]
115
+
116
+
117
+ class AcornStreamWriter(AcornCodec, codecs.StreamWriter):
118
+ """Stream writer for Acorn encoding."""
119
+
120
+
121
+ class AcornStreamReader(AcornCodec, codecs.StreamReader):
122
+ """Stream reader for Acorn encoding."""
123
+
124
+
125
+ def getregentry(name: str | None = None) -> codecs.CodecInfo:
126
+ """Return the codec registry entry for the Acorn encoding."""
127
+ return codecs.CodecInfo(
128
+ name="acorn",
129
+ encode=_SHARED_ACORN_CODEC.encode,
130
+ decode=_SHARED_ACORN_CODEC.decode,
131
+ incrementalencoder=AcornIncrementalEncoder,
132
+ incrementaldecoder=AcornIncrementalDecoder,
133
+ streamreader=AcornStreamReader,
134
+ streamwriter=AcornStreamWriter,
135
+ )
136
+
137
+
138
+ def search_function(encoding: str) -> codecs.CodecInfo | None:
139
+ """Codec registry search function recognising the ``"acorn"`` name."""
140
+ if encoding.lower() == "acorn":
141
+ return getregentry(encoding)
142
+ return None
143
+
144
+
145
+ # Register the codec as an import side effect.
146
+ codecs.register(search_function)
147
+
148
+
149
+ def acorn_to_unicode(data: bytes) -> str:
150
+ """Decode Acorn-encoded bytes to a Unicode string."""
151
+ return data.decode("acorn")
152
+
153
+
154
+ def unicode_to_acorn(text: str) -> bytes:
155
+ """Encode a Unicode string to Acorn-encoded bytes.
156
+
157
+ Raises:
158
+ UnicodeEncodeError: If *text* contains characters that cannot be
159
+ encoded in the Acorn character set.
160
+ """
161
+ return text.encode("acorn")
@@ -0,0 +1,51 @@
1
+ Metadata-Version: 2.4
2
+ Name: oaknut-codecs
3
+ Version: 12.7.2
4
+ Summary: Text codecs for Acorn computer character sets (BBC Micro / Electron)
5
+ Author-email: Robert Smallshire <robert@smallshire.org.uk>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-codecs
8
+ Project-URL: Repository, https://github.com/rob-smallshire/oaknut
9
+ Project-URL: Issues, https://github.com/rob-smallshire/oaknut/issues
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Dynamic: license-file
19
+
20
+ # oaknut-codecs
21
+
22
+ Text codecs for Acorn computer character sets, part of the
23
+ [oaknut](https://github.com/rob-smallshire/oaknut) family of packages for
24
+ working with Acorn computer filesystems, files, and formats.
25
+
26
+ This package provides a Python codec for the **Acorn/BBC Micro character
27
+ set** — a variant of ASCII in which `&60` is the pound sign `£` and `&7C`
28
+ is the broken bar `¦`. Importing the package registers the codec under
29
+ the name `"acorn"`, so it works with the standard `str.encode` /
30
+ `bytes.decode` machinery:
31
+
32
+ ```python
33
+ import oaknut.codecs # registers the "acorn" codec
34
+
35
+ "COST£100".encode("acorn") # b'COST\x60100'
36
+ b"COST\x60100".decode("acorn") # 'COST£100'
37
+ ```
38
+
39
+ It is the dependency-free bottom layer of the workspace, alongside
40
+ `oaknut-exception`, so that language and file packages can share one
41
+ codec implementation without taking a dependency on each other.
42
+
43
+ ## Installation
44
+
45
+ ```sh
46
+ uv add oaknut-codecs
47
+ ```
48
+
49
+ ## Licence
50
+
51
+ MIT
@@ -0,0 +1,7 @@
1
+ oaknut/codecs/__init__.py,sha256=db6OPz9MkxKhHGIhLMcxTfrlsiKGRf9WYK9xFvCYNvs,891
2
+ oaknut/codecs/acorn.py,sha256=_Zi0fdGZWun7oSU2et-iX89EIfEP2XcRMxoHq908q4E,5521
3
+ oaknut_codecs-12.7.2.dist-info/licenses/LICENSE,sha256=L_Uw2MQC3xsCy2nOzWmY_DYRQMHC-Yu2_zTvUTadNAY,1074
4
+ oaknut_codecs-12.7.2.dist-info/METADATA,sha256=rOPon958hHnD0EIVpSggi8euMzFxLd_TZRpZKNEgXXc,1756
5
+ oaknut_codecs-12.7.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ oaknut_codecs-12.7.2.dist-info/top_level.txt,sha256=QLKweXQ1HlrA4vuJ4fj2AKq-_qs54zd2M4O0DahuKzw,7
7
+ oaknut_codecs-12.7.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Robert Smallshire
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ oaknut