oaknut-codecs 12.7.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- oaknut_codecs-12.7.2/LICENSE +21 -0
- oaknut_codecs-12.7.2/PKG-INFO +51 -0
- oaknut_codecs-12.7.2/README.md +32 -0
- oaknut_codecs-12.7.2/pyproject.toml +46 -0
- oaknut_codecs-12.7.2/setup.cfg +4 -0
- oaknut_codecs-12.7.2/src/oaknut/codecs/__init__.py +32 -0
- oaknut_codecs-12.7.2/src/oaknut/codecs/acorn.py +161 -0
- oaknut_codecs-12.7.2/src/oaknut_codecs.egg-info/PKG-INFO +51 -0
- oaknut_codecs-12.7.2/src/oaknut_codecs.egg-info/SOURCES.txt +10 -0
- oaknut_codecs-12.7.2/src/oaknut_codecs.egg-info/dependency_links.txt +1 -0
- oaknut_codecs-12.7.2/src/oaknut_codecs.egg-info/top_level.txt +1 -0
- oaknut_codecs-12.7.2/tests/test_acorn.py +139 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Robert Smallshire
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: oaknut-codecs
|
|
3
|
+
Version: 12.7.2
|
|
4
|
+
Summary: Text codecs for Acorn computer character sets (BBC Micro / Electron)
|
|
5
|
+
Author-email: Robert Smallshire <robert@smallshire.org.uk>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-codecs
|
|
8
|
+
Project-URL: Repository, https://github.com/rob-smallshire/oaknut
|
|
9
|
+
Project-URL: Issues, https://github.com/rob-smallshire/oaknut/issues
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# oaknut-codecs
|
|
21
|
+
|
|
22
|
+
Text codecs for Acorn computer character sets, part of the
|
|
23
|
+
[oaknut](https://github.com/rob-smallshire/oaknut) family of packages for
|
|
24
|
+
working with Acorn computer filesystems, files, and formats.
|
|
25
|
+
|
|
26
|
+
This package provides a Python codec for the **Acorn/BBC Micro character
|
|
27
|
+
set** — a variant of ASCII in which `&60` is the pound sign `£` and `&7C`
|
|
28
|
+
is the broken bar `¦`. Importing the package registers the codec under
|
|
29
|
+
the name `"acorn"`, so it works with the standard `str.encode` /
|
|
30
|
+
`bytes.decode` machinery:
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import oaknut.codecs # registers the "acorn" codec
|
|
34
|
+
|
|
35
|
+
"COST£100".encode("acorn") # b'COST\x60100'
|
|
36
|
+
b"COST\x60100".decode("acorn") # 'COST£100'
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
It is the dependency-free bottom layer of the workspace, alongside
|
|
40
|
+
`oaknut-exception`, so that language and file packages can share one
|
|
41
|
+
codec implementation without taking a dependency on each other.
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
```sh
|
|
46
|
+
uv add oaknut-codecs
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Licence
|
|
50
|
+
|
|
51
|
+
MIT
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# oaknut-codecs
|
|
2
|
+
|
|
3
|
+
Text codecs for Acorn computer character sets, part of the
|
|
4
|
+
[oaknut](https://github.com/rob-smallshire/oaknut) family of packages for
|
|
5
|
+
working with Acorn computer filesystems, files, and formats.
|
|
6
|
+
|
|
7
|
+
This package provides a Python codec for the **Acorn/BBC Micro character
|
|
8
|
+
set** — a variant of ASCII in which `&60` is the pound sign `£` and `&7C`
|
|
9
|
+
is the broken bar `¦`. Importing the package registers the codec under
|
|
10
|
+
the name `"acorn"`, so it works with the standard `str.encode` /
|
|
11
|
+
`bytes.decode` machinery:
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
import oaknut.codecs # registers the "acorn" codec
|
|
15
|
+
|
|
16
|
+
"COST£100".encode("acorn") # b'COST\x60100'
|
|
17
|
+
b"COST\x60100".decode("acorn") # 'COST£100'
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
It is the dependency-free bottom layer of the workspace, alongside
|
|
21
|
+
`oaknut-exception`, so that language and file packages can share one
|
|
22
|
+
codec implementation without taking a dependency on each other.
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```sh
|
|
27
|
+
uv add oaknut-codecs
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Licence
|
|
31
|
+
|
|
32
|
+
MIT
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "oaknut-codecs"
|
|
7
|
+
dynamic = ["version"]
|
|
8
|
+
authors = [{ name = "Robert Smallshire", email = "robert@smallshire.org.uk" }]
|
|
9
|
+
description = "Text codecs for Acorn computer character sets (BBC Micro / Electron)"
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
license = "MIT"
|
|
12
|
+
license-files = ["LICENSE"]
|
|
13
|
+
requires-python = ">=3.11"
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3.11",
|
|
18
|
+
"Programming Language :: Python :: 3.12",
|
|
19
|
+
"Programming Language :: Python :: 3.13",
|
|
20
|
+
]
|
|
21
|
+
# The codecs are implemented entirely against the standard library's
|
|
22
|
+
# `codecs` machinery, so this package has no runtime dependencies. It is
|
|
23
|
+
# the shared bottom layer that lets siblings use the "acorn" encoding
|
|
24
|
+
# without depending on oaknut-file.
|
|
25
|
+
dependencies = []
|
|
26
|
+
|
|
27
|
+
[project.urls]
|
|
28
|
+
Homepage = "https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-codecs"
|
|
29
|
+
Repository = "https://github.com/rob-smallshire/oaknut"
|
|
30
|
+
Issues = "https://github.com/rob-smallshire/oaknut/issues"
|
|
31
|
+
|
|
32
|
+
[dependency-groups]
|
|
33
|
+
test = [
|
|
34
|
+
"pytest>=8.0",
|
|
35
|
+
]
|
|
36
|
+
dev = [
|
|
37
|
+
"bump-my-version>=0.28.0",
|
|
38
|
+
"pre-commit>=3.0",
|
|
39
|
+
{include-group = "test"},
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[tool.setuptools.dynamic]
|
|
43
|
+
version = { attr = "oaknut.codecs.__version__" }
|
|
44
|
+
|
|
45
|
+
[tool.setuptools.packages.find]
|
|
46
|
+
where = ["src"]
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Text codecs for Acorn computer character sets.
|
|
2
|
+
|
|
3
|
+
This is the bottom layer of the oaknut workspace alongside
|
|
4
|
+
``oaknut-exception``: a dependency-free home for the character-set codecs
|
|
5
|
+
shared by the file, filesystem and language packages. Splitting the
|
|
6
|
+
codecs out lets siblings such as ``oaknut-basic`` use the ``"acorn"``
|
|
7
|
+
encoding without taking a dependency on ``oaknut-file``.
|
|
8
|
+
|
|
9
|
+
Importing this package registers the ``"acorn"`` codec as a side effect,
|
|
10
|
+
so ``"text".encode("acorn")`` works anywhere once any oaknut package has
|
|
11
|
+
been imported.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from oaknut.codecs.acorn import (
|
|
17
|
+
BBC_MICRO_TO_UNICODE,
|
|
18
|
+
UNICODE_TO_BBC_MICRO,
|
|
19
|
+
AcornCodec,
|
|
20
|
+
acorn_to_unicode,
|
|
21
|
+
unicode_to_acorn,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__version__ = "12.7.2"
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"BBC_MICRO_TO_UNICODE",
|
|
28
|
+
"UNICODE_TO_BBC_MICRO",
|
|
29
|
+
"AcornCodec",
|
|
30
|
+
"acorn_to_unicode",
|
|
31
|
+
"unicode_to_acorn",
|
|
32
|
+
]
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
"""Character encoding for the Acorn/BBC Micro character set.
|
|
2
|
+
|
|
3
|
+
The BBC Micro and Acorn Electron used a variant of ASCII with a couple
|
|
4
|
+
of UK-specific characters. This module implements a Python codec for
|
|
5
|
+
that encoding and registers it under the name ``"acorn"``::
|
|
6
|
+
|
|
7
|
+
text = "COST£100"
|
|
8
|
+
data = text.encode("acorn") # b"COST\\x60100"
|
|
9
|
+
back = data.decode("acorn") # "COST£100"
|
|
10
|
+
|
|
11
|
+
Importing this module (or its package, :mod:`oaknut.codecs`) registers
|
|
12
|
+
the codec as a side effect, after which ``"acorn"`` resolves alongside
|
|
13
|
+
the stdlib encodings everywhere in the process.
|
|
14
|
+
|
|
15
|
+
References:
|
|
16
|
+
- https://beebwiki.mdfs.net/ASCII
|
|
17
|
+
- https://www.acornelectron.co.uk/ugs/electron/acorn_computers/ug-english/appendix_f_eng.html
|
|
18
|
+
- https://tobylobster.github.io/mos/mos/S-s4.html
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import codecs
|
|
24
|
+
|
|
25
|
+
# BBC Micro (MODEs 0-6) character mappings: Acorn byte values that differ
|
|
26
|
+
# from ASCII, mapped to the Unicode characters they stand for.
|
|
27
|
+
BBC_MICRO_TO_UNICODE = {
|
|
28
|
+
0x60: "£", # Backtick replaced with pound sign
|
|
29
|
+
0x7C: "¦", # Vertical bar replaced with broken bar
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
# Reverse mapping for encoding Unicode to BBC Micro bytes.
|
|
33
|
+
UNICODE_TO_BBC_MICRO = {v: k for k, v in BBC_MICRO_TO_UNICODE.items()}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class AcornCodec(codecs.Codec):
|
|
37
|
+
"""Codec for the Acorn/BBC Micro character encoding."""
|
|
38
|
+
|
|
39
|
+
def encode(self, input: str, errors: str = "strict") -> tuple[bytes, int]:
|
|
40
|
+
"""Encode a Unicode string to Acorn bytes.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
input: Unicode string to encode.
|
|
44
|
+
errors: Error handling ('strict', 'ignore', 'replace').
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Tuple of (encoded bytes, length of input consumed).
|
|
48
|
+
"""
|
|
49
|
+
output = bytearray()
|
|
50
|
+
for i, char in enumerate(input):
|
|
51
|
+
if char in UNICODE_TO_BBC_MICRO:
|
|
52
|
+
output.append(UNICODE_TO_BBC_MICRO[char])
|
|
53
|
+
else:
|
|
54
|
+
code_point = ord(char)
|
|
55
|
+
if code_point > 255:
|
|
56
|
+
if errors == "strict":
|
|
57
|
+
raise UnicodeEncodeError(
|
|
58
|
+
"acorn",
|
|
59
|
+
input,
|
|
60
|
+
i,
|
|
61
|
+
i + 1,
|
|
62
|
+
f"Character '{char}' (U+{code_point:04X}) cannot be "
|
|
63
|
+
f"encoded in Acorn character set",
|
|
64
|
+
)
|
|
65
|
+
elif errors == "ignore":
|
|
66
|
+
continue
|
|
67
|
+
elif errors == "replace":
|
|
68
|
+
output.append(ord("?"))
|
|
69
|
+
else:
|
|
70
|
+
raise ValueError(f"Unknown error handling: {errors}")
|
|
71
|
+
else:
|
|
72
|
+
output.append(code_point)
|
|
73
|
+
|
|
74
|
+
return bytes(output), len(input)
|
|
75
|
+
|
|
76
|
+
def decode(self, input: bytes, errors: str = "strict") -> tuple[str, int]:
|
|
77
|
+
"""Decode Acorn bytes to a Unicode string.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
input: Bytes in Acorn encoding.
|
|
81
|
+
errors: Error handling ('strict', 'ignore', 'replace').
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Tuple of (decoded string, length of input consumed).
|
|
85
|
+
"""
|
|
86
|
+
output = []
|
|
87
|
+
for byte in input:
|
|
88
|
+
if byte in BBC_MICRO_TO_UNICODE:
|
|
89
|
+
output.append(BBC_MICRO_TO_UNICODE[byte])
|
|
90
|
+
else:
|
|
91
|
+
# Standard ASCII or high-bit characters.
|
|
92
|
+
output.append(chr(byte))
|
|
93
|
+
|
|
94
|
+
return "".join(output), len(input)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# The Acorn codec is byte-for-byte stateless, so a single shared instance
|
|
98
|
+
# backs every registry entry point — the incremental encoder and decoder,
|
|
99
|
+
# the stream reader/writer, and the non-incremental encode/decode callables.
|
|
100
|
+
_SHARED_ACORN_CODEC = AcornCodec()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class AcornIncrementalEncoder(codecs.IncrementalEncoder):
|
|
104
|
+
"""Incremental encoder for Acorn encoding."""
|
|
105
|
+
|
|
106
|
+
def encode(self, input: str, final: bool = False) -> bytes:
|
|
107
|
+
return _SHARED_ACORN_CODEC.encode(input, self.errors)[0]
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class AcornIncrementalDecoder(codecs.IncrementalDecoder):
|
|
111
|
+
"""Incremental decoder for Acorn encoding."""
|
|
112
|
+
|
|
113
|
+
def decode(self, input: bytes, final: bool = False) -> str:
|
|
114
|
+
return _SHARED_ACORN_CODEC.decode(input, self.errors)[0]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class AcornStreamWriter(AcornCodec, codecs.StreamWriter):
|
|
118
|
+
"""Stream writer for Acorn encoding."""
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class AcornStreamReader(AcornCodec, codecs.StreamReader):
|
|
122
|
+
"""Stream reader for Acorn encoding."""
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def getregentry(name: str | None = None) -> codecs.CodecInfo:
|
|
126
|
+
"""Return the codec registry entry for the Acorn encoding."""
|
|
127
|
+
return codecs.CodecInfo(
|
|
128
|
+
name="acorn",
|
|
129
|
+
encode=_SHARED_ACORN_CODEC.encode,
|
|
130
|
+
decode=_SHARED_ACORN_CODEC.decode,
|
|
131
|
+
incrementalencoder=AcornIncrementalEncoder,
|
|
132
|
+
incrementaldecoder=AcornIncrementalDecoder,
|
|
133
|
+
streamreader=AcornStreamReader,
|
|
134
|
+
streamwriter=AcornStreamWriter,
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def search_function(encoding: str) -> codecs.CodecInfo | None:
|
|
139
|
+
"""Codec registry search function recognising the ``"acorn"`` name."""
|
|
140
|
+
if encoding.lower() == "acorn":
|
|
141
|
+
return getregentry(encoding)
|
|
142
|
+
return None
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
# Register the codec as an import side effect.
|
|
146
|
+
codecs.register(search_function)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def acorn_to_unicode(data: bytes) -> str:
|
|
150
|
+
"""Decode Acorn-encoded bytes to a Unicode string."""
|
|
151
|
+
return data.decode("acorn")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def unicode_to_acorn(text: str) -> bytes:
|
|
155
|
+
"""Encode a Unicode string to Acorn-encoded bytes.
|
|
156
|
+
|
|
157
|
+
Raises:
|
|
158
|
+
UnicodeEncodeError: If *text* contains characters that cannot be
|
|
159
|
+
encoded in the Acorn character set.
|
|
160
|
+
"""
|
|
161
|
+
return text.encode("acorn")
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: oaknut-codecs
|
|
3
|
+
Version: 12.7.2
|
|
4
|
+
Summary: Text codecs for Acorn computer character sets (BBC Micro / Electron)
|
|
5
|
+
Author-email: Robert Smallshire <robert@smallshire.org.uk>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-codecs
|
|
8
|
+
Project-URL: Repository, https://github.com/rob-smallshire/oaknut
|
|
9
|
+
Project-URL: Issues, https://github.com/rob-smallshire/oaknut/issues
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Dynamic: license-file
|
|
19
|
+
|
|
20
|
+
# oaknut-codecs
|
|
21
|
+
|
|
22
|
+
Text codecs for Acorn computer character sets, part of the
|
|
23
|
+
[oaknut](https://github.com/rob-smallshire/oaknut) family of packages for
|
|
24
|
+
working with Acorn computer filesystems, files, and formats.
|
|
25
|
+
|
|
26
|
+
This package provides a Python codec for the **Acorn/BBC Micro character
|
|
27
|
+
set** — a variant of ASCII in which `&60` is the pound sign `£` and `&7C`
|
|
28
|
+
is the broken bar `¦`. Importing the package registers the codec under
|
|
29
|
+
the name `"acorn"`, so it works with the standard `str.encode` /
|
|
30
|
+
`bytes.decode` machinery:
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import oaknut.codecs # registers the "acorn" codec
|
|
34
|
+
|
|
35
|
+
"COST£100".encode("acorn") # b'COST\x60100'
|
|
36
|
+
b"COST\x60100".decode("acorn") # 'COST£100'
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
It is the dependency-free bottom layer of the workspace, alongside
|
|
40
|
+
`oaknut-exception`, so that language and file packages can share one
|
|
41
|
+
codec implementation without taking a dependency on each other.
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
```sh
|
|
46
|
+
uv add oaknut-codecs
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Licence
|
|
50
|
+
|
|
51
|
+
MIT
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/oaknut/codecs/__init__.py
|
|
5
|
+
src/oaknut/codecs/acorn.py
|
|
6
|
+
src/oaknut_codecs.egg-info/PKG-INFO
|
|
7
|
+
src/oaknut_codecs.egg-info/SOURCES.txt
|
|
8
|
+
src/oaknut_codecs.egg-info/dependency_links.txt
|
|
9
|
+
src/oaknut_codecs.egg-info/top_level.txt
|
|
10
|
+
tests/test_acorn.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
oaknut
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""Tests for the Acorn/BBC Micro character codec."""
|
|
2
|
+
|
|
3
|
+
import oaknut.codecs # noqa: F401 - registers the "acorn" codec
|
|
4
|
+
import pytest
|
|
5
|
+
from oaknut.codecs import acorn_to_unicode, unicode_to_acorn
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TestAcornToUnicode:
|
|
9
|
+
"""Tests for decoding Acorn bytes to Unicode."""
|
|
10
|
+
|
|
11
|
+
def test_standard_ascii(self):
|
|
12
|
+
assert acorn_to_unicode(b"HELLO") == "HELLO"
|
|
13
|
+
|
|
14
|
+
def test_pound_sign_bbc_micro(self):
|
|
15
|
+
assert acorn_to_unicode(b"COST\x60100") == "COST£100"
|
|
16
|
+
|
|
17
|
+
def test_broken_bar_bbc_micro(self):
|
|
18
|
+
assert acorn_to_unicode(b"A\x7cB") == "A¦B"
|
|
19
|
+
|
|
20
|
+
def test_mixed_characters(self):
|
|
21
|
+
assert acorn_to_unicode(b"PRICE:\x60500") == "PRICE:£500"
|
|
22
|
+
|
|
23
|
+
def test_empty_bytes(self):
|
|
24
|
+
assert acorn_to_unicode(b"") == ""
|
|
25
|
+
|
|
26
|
+
def test_high_bit_characters(self):
|
|
27
|
+
assert len(acorn_to_unicode(bytes([0x80, 0xFF]))) == 2
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class TestUnicodeToAcorn:
|
|
31
|
+
"""Tests for encoding Unicode to Acorn bytes."""
|
|
32
|
+
|
|
33
|
+
def test_standard_ascii(self):
|
|
34
|
+
assert unicode_to_acorn("HELLO") == b"HELLO"
|
|
35
|
+
|
|
36
|
+
def test_pound_sign_bbc_micro(self):
|
|
37
|
+
assert unicode_to_acorn("COST£100") == b"COST\x60100"
|
|
38
|
+
|
|
39
|
+
def test_broken_bar_bbc_micro(self):
|
|
40
|
+
assert unicode_to_acorn("A¦B") == b"A\x7cB"
|
|
41
|
+
|
|
42
|
+
def test_empty_string(self):
|
|
43
|
+
assert unicode_to_acorn("") == b""
|
|
44
|
+
|
|
45
|
+
def test_round_trip_bbc_micro(self):
|
|
46
|
+
original = "FILE£NAME"
|
|
47
|
+
assert acorn_to_unicode(unicode_to_acorn(original)) == original
|
|
48
|
+
|
|
49
|
+
def test_invalid_character_raises(self):
|
|
50
|
+
with pytest.raises(ValueError, match="cannot be encoded"):
|
|
51
|
+
unicode_to_acorn("HELLO\U0001f4be") # floppy disk emoji
|
|
52
|
+
|
|
53
|
+
def test_high_unicode_raises(self):
|
|
54
|
+
with pytest.raises(ValueError, match="cannot be encoded"):
|
|
55
|
+
unicode_to_acorn("TEST™") # U+2122
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class TestCodecInterface:
|
|
59
|
+
"""Tests for the Python codec interface."""
|
|
60
|
+
|
|
61
|
+
def test_encode_with_codec(self):
|
|
62
|
+
assert "HELLO".encode("acorn") == b"HELLO"
|
|
63
|
+
|
|
64
|
+
def test_decode_with_codec(self):
|
|
65
|
+
assert b"HELLO".decode("acorn") == "HELLO"
|
|
66
|
+
|
|
67
|
+
def test_encode_pound_sign(self):
|
|
68
|
+
assert "£100".encode("acorn") == b"\x60100"
|
|
69
|
+
|
|
70
|
+
def test_decode_pound_sign(self):
|
|
71
|
+
assert b"\x60100".decode("acorn") == "£100"
|
|
72
|
+
|
|
73
|
+
def test_codec_round_trip(self):
|
|
74
|
+
original = "TEST£FILE"
|
|
75
|
+
assert original.encode("acorn").decode("acorn") == original
|
|
76
|
+
|
|
77
|
+
def test_codec_name_is_case_insensitive(self):
|
|
78
|
+
assert "£".encode("acorn") == b"\x60"
|
|
79
|
+
assert "£".encode("ACORN") == b"\x60"
|
|
80
|
+
|
|
81
|
+
def test_encode_errors_strict(self):
|
|
82
|
+
with pytest.raises(UnicodeEncodeError):
|
|
83
|
+
"TEST™".encode("acorn", errors="strict")
|
|
84
|
+
|
|
85
|
+
def test_encode_errors_ignore(self):
|
|
86
|
+
assert "TEST™OK".encode("acorn", errors="ignore") == b"TESTOK"
|
|
87
|
+
|
|
88
|
+
def test_encode_errors_replace(self):
|
|
89
|
+
assert "TEST™".encode("acorn", errors="replace") == b"TEST?"
|
|
90
|
+
|
|
91
|
+
def test_codec_with_file_like(self):
|
|
92
|
+
import io
|
|
93
|
+
|
|
94
|
+
buffer = io.BytesIO()
|
|
95
|
+
writer = io.TextIOWrapper(buffer, encoding="acorn")
|
|
96
|
+
writer.write("£100")
|
|
97
|
+
writer.flush()
|
|
98
|
+
|
|
99
|
+
buffer.seek(0)
|
|
100
|
+
reader = io.TextIOWrapper(buffer, encoding="acorn")
|
|
101
|
+
assert reader.read() == "£100"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class TestCodecRegistry:
|
|
105
|
+
"""Exercise the bits of codec registry integration beyond str.encode."""
|
|
106
|
+
|
|
107
|
+
def test_codecs_lookup_returns_info(self):
|
|
108
|
+
import codecs
|
|
109
|
+
|
|
110
|
+
assert codecs.lookup("acorn").name == "acorn"
|
|
111
|
+
|
|
112
|
+
def test_incremental_encoder_streams_chunks(self):
|
|
113
|
+
import codecs
|
|
114
|
+
|
|
115
|
+
encoder = codecs.getincrementalencoder("acorn")()
|
|
116
|
+
out = encoder.encode("COST") + encoder.encode("£") + encoder.encode("100", final=True)
|
|
117
|
+
assert out == b"COST\x60100"
|
|
118
|
+
|
|
119
|
+
def test_incremental_decoder_streams_chunks(self):
|
|
120
|
+
import codecs
|
|
121
|
+
|
|
122
|
+
decoder = codecs.getincrementaldecoder("acorn")()
|
|
123
|
+
out = decoder.decode(b"COST") + decoder.decode(b"\x60") + decoder.decode(b"100", final=True)
|
|
124
|
+
assert out == "COST£100"
|
|
125
|
+
|
|
126
|
+
def test_incremental_encoder_respects_errors(self):
|
|
127
|
+
import codecs
|
|
128
|
+
|
|
129
|
+
encoder = codecs.getincrementalencoder("acorn")(errors="replace")
|
|
130
|
+
assert encoder.encode("AĀB") == b"A?B"
|
|
131
|
+
|
|
132
|
+
def test_iterencode_and_iterdecode(self):
|
|
133
|
+
import codecs
|
|
134
|
+
|
|
135
|
+
out = b"".join(codecs.iterencode(iter(["COST", "£", "100"]), "acorn"))
|
|
136
|
+
assert out == b"COST\x60100"
|
|
137
|
+
|
|
138
|
+
text = "".join(codecs.iterdecode(iter([b"COST", b"\x60", b"100"]), "acorn"))
|
|
139
|
+
assert text == "COST£100"
|