oaknut-codecs 12.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oaknut/codecs/__init__.py +32 -0
- oaknut/codecs/acorn.py +161 -0
- oaknut_codecs-12.7.2.dist-info/METADATA +51 -0
- oaknut_codecs-12.7.2.dist-info/RECORD +7 -0
- oaknut_codecs-12.7.2.dist-info/WHEEL +5 -0
- oaknut_codecs-12.7.2.dist-info/licenses/LICENSE +21 -0
- oaknut_codecs-12.7.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Text codecs for Acorn computer character sets.
|
|
2
|
+
|
|
3
|
+
This is the bottom layer of the oaknut workspace alongside
|
|
4
|
+
``oaknut-exception``: a dependency-free home for the character-set codecs
|
|
5
|
+
shared by the file, filesystem and language packages. Splitting the
|
|
6
|
+
codecs out lets siblings such as ``oaknut-basic`` use the ``"acorn"``
|
|
7
|
+
encoding without taking a dependency on ``oaknut-file``.
|
|
8
|
+
|
|
9
|
+
Importing this package registers the ``"acorn"`` codec as a side effect,
|
|
10
|
+
so ``"text".encode("acorn")`` works anywhere once any oaknut package has
|
|
11
|
+
been imported.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from oaknut.codecs.acorn import (
|
|
17
|
+
BBC_MICRO_TO_UNICODE,
|
|
18
|
+
UNICODE_TO_BBC_MICRO,
|
|
19
|
+
AcornCodec,
|
|
20
|
+
acorn_to_unicode,
|
|
21
|
+
unicode_to_acorn,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__version__ = "12.7.2"
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"BBC_MICRO_TO_UNICODE",
|
|
28
|
+
"UNICODE_TO_BBC_MICRO",
|
|
29
|
+
"AcornCodec",
|
|
30
|
+
"acorn_to_unicode",
|
|
31
|
+
"unicode_to_acorn",
|
|
32
|
+
]
|
oaknut/codecs/acorn.py
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Character encoding for the Acorn/BBC Micro character set.
|
|
2
|
+
|
|
3
|
+
The BBC Micro and Acorn Electron used a variant of ASCII with a couple
|
|
4
|
+
of UK-specific characters. This module implements a Python codec for
|
|
5
|
+
that encoding and registers it under the name ``"acorn"``::
|
|
6
|
+
|
|
7
|
+
text = "COST£100"
|
|
8
|
+
data = text.encode("acorn") # b"COST\\x60100"
|
|
9
|
+
back = data.decode("acorn") # "COST£100"
|
|
10
|
+
|
|
11
|
+
Importing this module (or its package, :mod:`oaknut.codecs`) registers
|
|
12
|
+
the codec as a side effect, after which ``"acorn"`` resolves alongside
|
|
13
|
+
the stdlib encodings everywhere in the process.
|
|
14
|
+
|
|
15
|
+
References:
|
|
16
|
+
- https://beebwiki.mdfs.net/ASCII
|
|
17
|
+
- https://www.acornelectron.co.uk/ugs/electron/acorn_computers/ug-english/appendix_f_eng.html
|
|
18
|
+
- https://tobylobster.github.io/mos/mos/S-s4.html
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import codecs
|
|
24
|
+
|
|
25
|
+
# BBC Micro (MODEs 0-6) character mappings: Acorn byte values that differ
|
|
26
|
+
# from ASCII, mapped to the Unicode characters they stand for.
|
|
27
|
+
BBC_MICRO_TO_UNICODE = {
|
|
28
|
+
0x60: "£", # Backtick replaced with pound sign
|
|
29
|
+
0x7C: "¦", # Vertical bar replaced with broken bar
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# Reverse mapping for encoding Unicode to BBC Micro bytes.
|
|
33
|
+
UNICODE_TO_BBC_MICRO = {v: k for k, v in BBC_MICRO_TO_UNICODE.items()}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class AcornCodec(codecs.Codec):
|
|
37
|
+
"""Codec for the Acorn/BBC Micro character encoding."""
|
|
38
|
+
|
|
39
|
+
def encode(self, input: str, errors: str = "strict") -> tuple[bytes, int]:
|
|
40
|
+
"""Encode a Unicode string to Acorn bytes.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
input: Unicode string to encode.
|
|
44
|
+
errors: Error handling ('strict', 'ignore', 'replace').
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Tuple of (encoded bytes, length of input consumed).
|
|
48
|
+
"""
|
|
49
|
+
output = bytearray()
|
|
50
|
+
for i, char in enumerate(input):
|
|
51
|
+
if char in UNICODE_TO_BBC_MICRO:
|
|
52
|
+
output.append(UNICODE_TO_BBC_MICRO[char])
|
|
53
|
+
else:
|
|
54
|
+
code_point = ord(char)
|
|
55
|
+
if code_point > 255:
|
|
56
|
+
if errors == "strict":
|
|
57
|
+
raise UnicodeEncodeError(
|
|
58
|
+
"acorn",
|
|
59
|
+
input,
|
|
60
|
+
i,
|
|
61
|
+
i + 1,
|
|
62
|
+
f"Character '{char}' (U+{code_point:04X}) cannot be "
|
|
63
|
+
f"encoded in Acorn character set",
|
|
64
|
+
)
|
|
65
|
+
elif errors == "ignore":
|
|
66
|
+
continue
|
|
67
|
+
elif errors == "replace":
|
|
68
|
+
output.append(ord("?"))
|
|
69
|
+
else:
|
|
70
|
+
raise ValueError(f"Unknown error handling: {errors}")
|
|
71
|
+
else:
|
|
72
|
+
output.append(code_point)
|
|
73
|
+
|
|
74
|
+
return bytes(output), len(input)
|
|
75
|
+
|
|
76
|
+
def decode(self, input: bytes, errors: str = "strict") -> tuple[str, int]:
|
|
77
|
+
"""Decode Acorn bytes to a Unicode string.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
input: Bytes in Acorn encoding.
|
|
81
|
+
errors: Error handling ('strict', 'ignore', 'replace').
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Tuple of (decoded string, length of input consumed).
|
|
85
|
+
"""
|
|
86
|
+
output = []
|
|
87
|
+
for byte in input:
|
|
88
|
+
if byte in BBC_MICRO_TO_UNICODE:
|
|
89
|
+
output.append(BBC_MICRO_TO_UNICODE[byte])
|
|
90
|
+
else:
|
|
91
|
+
# Standard ASCII or high-bit characters.
|
|
92
|
+
output.append(chr(byte))
|
|
93
|
+
|
|
94
|
+
return "".join(output), len(input)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# The Acorn codec is byte-for-byte stateless, so a single shared instance
|
|
98
|
+
# backs every registry entry point — the incremental encoder and decoder,
|
|
99
|
+
# the stream reader/writer, and the non-incremental encode/decode callables.
|
|
100
|
+
_SHARED_ACORN_CODEC = AcornCodec()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class AcornIncrementalEncoder(codecs.IncrementalEncoder):
|
|
104
|
+
"""Incremental encoder for Acorn encoding."""
|
|
105
|
+
|
|
106
|
+
def encode(self, input: str, final: bool = False) -> bytes:
|
|
107
|
+
return _SHARED_ACORN_CODEC.encode(input, self.errors)[0]
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class AcornIncrementalDecoder(codecs.IncrementalDecoder):
|
|
111
|
+
"""Incremental decoder for Acorn encoding."""
|
|
112
|
+
|
|
113
|
+
def decode(self, input: bytes, final: bool = False) -> str:
|
|
114
|
+
return _SHARED_ACORN_CODEC.decode(input, self.errors)[0]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class AcornStreamWriter(AcornCodec, codecs.StreamWriter):
|
|
118
|
+
"""Stream writer for Acorn encoding."""
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class AcornStreamReader(AcornCodec, codecs.StreamReader):
|
|
122
|
+
"""Stream reader for Acorn encoding."""
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def getregentry(name: str | None = None) -> codecs.CodecInfo:
|
|
126
|
+
"""Return the codec registry entry for the Acorn encoding."""
|
|
127
|
+
return codecs.CodecInfo(
|
|
128
|
+
name="acorn",
|
|
129
|
+
encode=_SHARED_ACORN_CODEC.encode,
|
|
130
|
+
decode=_SHARED_ACORN_CODEC.decode,
|
|
131
|
+
incrementalencoder=AcornIncrementalEncoder,
|
|
132
|
+
incrementaldecoder=AcornIncrementalDecoder,
|
|
133
|
+
streamreader=AcornStreamReader,
|
|
134
|
+
streamwriter=AcornStreamWriter,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def search_function(encoding: str) -> codecs.CodecInfo | None:
|
|
139
|
+
"""Codec registry search function recognising the ``"acorn"`` name."""
|
|
140
|
+
if encoding.lower() == "acorn":
|
|
141
|
+
return getregentry(encoding)
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# Register the codec as an import side effect.
|
|
146
|
+
codecs.register(search_function)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def acorn_to_unicode(data: bytes) -> str:
|
|
150
|
+
"""Decode Acorn-encoded bytes to a Unicode string."""
|
|
151
|
+
return data.decode("acorn")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def unicode_to_acorn(text: str) -> bytes:
|
|
155
|
+
"""Encode a Unicode string to Acorn-encoded bytes.
|
|
156
|
+
|
|
157
|
+
Raises:
|
|
158
|
+
UnicodeEncodeError: If *text* contains characters that cannot be
|
|
159
|
+
encoded in the Acorn character set.
|
|
160
|
+
"""
|
|
161
|
+
return text.encode("acorn")
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: oaknut-codecs
|
|
3
|
+
Version: 12.7.2
|
|
4
|
+
Summary: Text codecs for Acorn computer character sets (BBC Micro / Electron)
|
|
5
|
+
Author-email: Robert Smallshire <robert@smallshire.org.uk>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-codecs
|
|
8
|
+
Project-URL: Repository, https://github.com/rob-smallshire/oaknut
|
|
9
|
+
Project-URL: Issues, https://github.com/rob-smallshire/oaknut/issues
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# oaknut-codecs
|
|
21
|
+
|
|
22
|
+
Text codecs for Acorn computer character sets, part of the
|
|
23
|
+
[oaknut](https://github.com/rob-smallshire/oaknut) family of packages for
|
|
24
|
+
working with Acorn computer filesystems, files, and formats.
|
|
25
|
+
|
|
26
|
+
This package provides a Python codec for the **Acorn/BBC Micro character
|
|
27
|
+
set** — a variant of ASCII in which `&60` is the pound sign `£` and `&7C`
|
|
28
|
+
is the broken bar `¦`. Importing the package registers the codec under
|
|
29
|
+
the name `"acorn"`, so it works with the standard `str.encode` /
|
|
30
|
+
`bytes.decode` machinery:
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import oaknut.codecs # registers the "acorn" codec
|
|
34
|
+
|
|
35
|
+
"COST£100".encode("acorn") # b'COST\x60100'
|
|
36
|
+
b"COST\x60100".decode("acorn") # 'COST£100'
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
It is the dependency-free bottom layer of the workspace, alongside
|
|
40
|
+
`oaknut-exception`, so that language and file packages can share one
|
|
41
|
+
codec implementation without taking a dependency on each other.
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
```sh
|
|
46
|
+
uv add oaknut-codecs
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Licence
|
|
50
|
+
|
|
51
|
+
MIT
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
oaknut/codecs/__init__.py,sha256=db6OPz9MkxKhHGIhLMcxTfrlsiKGRf9WYK9xFvCYNvs,891
|
|
2
|
+
oaknut/codecs/acorn.py,sha256=_Zi0fdGZWun7oSU2et-iX89EIfEP2XcRMxoHq908q4E,5521
|
|
3
|
+
oaknut_codecs-12.7.2.dist-info/licenses/LICENSE,sha256=L_Uw2MQC3xsCy2nOzWmY_DYRQMHC-Yu2_zTvUTadNAY,1074
|
|
4
|
+
oaknut_codecs-12.7.2.dist-info/METADATA,sha256=rOPon958hHnD0EIVpSggi8euMzFxLd_TZRpZKNEgXXc,1756
|
|
5
|
+
oaknut_codecs-12.7.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
6
|
+
oaknut_codecs-12.7.2.dist-info/top_level.txt,sha256=QLKweXQ1HlrA4vuJ4fj2AKq-_qs54zd2M4O0DahuKzw,7
|
|
7
|
+
oaknut_codecs-12.7.2.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Robert Smallshire
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
oaknut
|