oaknut-basic 12.6.1__tar.gz → 12.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. oaknut_basic-12.7.1/PKG-INFO +133 -0
  2. oaknut_basic-12.7.1/README.md +111 -0
  3. {oaknut_basic-12.6.1 → oaknut_basic-12.7.1}/pyproject.toml +16 -3
  4. oaknut_basic-12.7.1/src/oaknut/basic/__init__.py +72 -0
  5. oaknut_basic-12.7.1/src/oaknut/basic/cli.py +279 -0
  6. oaknut_basic-12.7.1/src/oaknut/basic/detokeniser.py +116 -0
  7. oaknut_basic-12.7.1/src/oaknut/basic/exceptions.py +211 -0
  8. oaknut_basic-12.7.1/src/oaknut/basic/linenumber.py +65 -0
  9. oaknut_basic-12.7.1/src/oaknut/basic/numbering.py +64 -0
  10. oaknut_basic-12.7.1/src/oaknut/basic/tokeniser.py +369 -0
  11. oaknut_basic-12.7.1/src/oaknut/basic/tokens.py +197 -0
  12. oaknut_basic-12.7.1/src/oaknut_basic.egg-info/PKG-INFO +133 -0
  13. oaknut_basic-12.7.1/src/oaknut_basic.egg-info/SOURCES.txt +28 -0
  14. oaknut_basic-12.7.1/src/oaknut_basic.egg-info/entry_points.txt +2 -0
  15. oaknut_basic-12.7.1/src/oaknut_basic.egg-info/requires.txt +4 -0
  16. {oaknut_basic-12.6.1 → oaknut_basic-12.7.1}/tests/test_basic.py +0 -20
  17. oaknut_basic-12.7.1/tests/test_cli.py +202 -0
  18. oaknut_basic-12.7.1/tests/test_crunch_rules.py +73 -0
  19. oaknut_basic-12.7.1/tests/test_detokeniser.py +116 -0
  20. oaknut_basic-12.7.1/tests/test_keyword_coverage.py +98 -0
  21. oaknut_basic-12.7.1/tests/test_linenumber.py +63 -0
  22. oaknut_basic-12.7.1/tests/test_numbering.py +97 -0
  23. oaknut_basic-12.7.1/tests/test_rom_golden.py +100 -0
  24. oaknut_basic-12.7.1/tests/test_rom_golden_detokenise.py +51 -0
  25. oaknut_basic-12.7.1/tests/test_tokeniser.py +203 -0
  26. oaknut_basic-12.7.1/tests/test_tokens.py +70 -0
  27. oaknut_basic-12.6.1/PKG-INFO +0 -29
  28. oaknut_basic-12.6.1/README.md +0 -9
  29. oaknut_basic-12.6.1/src/oaknut/basic/__init__.py +0 -60
  30. oaknut_basic-12.6.1/src/oaknut_basic.egg-info/PKG-INFO +0 -29
  31. oaknut_basic-12.6.1/src/oaknut_basic.egg-info/SOURCES.txt +0 -9
  32. {oaknut_basic-12.6.1 → oaknut_basic-12.7.1}/LICENSE +0 -0
  33. {oaknut_basic-12.6.1 → oaknut_basic-12.7.1}/setup.cfg +0 -0
  34. {oaknut_basic-12.6.1 → oaknut_basic-12.7.1}/src/oaknut_basic.egg-info/dependency_links.txt +0 -0
  35. {oaknut_basic-12.6.1 → oaknut_basic-12.7.1}/src/oaknut_basic.egg-info/top_level.txt +0 -0
@@ -0,0 +1,133 @@
1
+ Metadata-Version: 2.4
2
+ Name: oaknut-basic
3
+ Version: 12.7.1
4
+ Summary: BBC BASIC tokeniser and detokeniser for Acorn 8-bit and 32-bit BASIC source files
5
+ Author-email: Robert Smallshire <robert@smallshire.org.uk>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-basic
8
+ Project-URL: Repository, https://github.com/rob-smallshire/oaknut
9
+ Project-URL: Issues, https://github.com/rob-smallshire/oaknut/issues
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Python: >=3.11
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: oaknut-exception>=10.0
19
+ Provides-Extra: cli
20
+ Requires-Dist: oaknut-cli>=10.0; extra == "cli"
21
+ Dynamic: license-file
22
+
23
+ <p align="center">
24
+ <img src="https://raw.githubusercontent.com/rob-smallshire/oaknut/master/docs/basic/_static/oaknut-basic-logo.png" alt="oaknut-basic" width="300">
25
+ </p>
26
+
27
+ # oaknut-basic
28
+
29
+ [![PyPI version](https://img.shields.io/pypi/v/oaknut-basic)](https://pypi.org/project/oaknut-basic/)
30
+ [![CI](https://github.com/rob-smallshire/oaknut/actions/workflows/ci.yml/badge.svg)](https://github.com/rob-smallshire/oaknut/actions/workflows/ci.yml)
31
+ [![Python versions](https://img.shields.io/pypi/pyversions/oaknut-basic)](https://pypi.org/project/oaknut-basic/)
32
+ [![License: MIT](https://img.shields.io/pypi/l/oaknut-basic)](https://github.com/rob-smallshire/oaknut/blob/master/packages/oaknut-basic/LICENSE)
33
+ [![Documentation](https://img.shields.io/badge/docs-online-blue)](https://rob-smallshire.github.io/oaknut/basic/)
34
+
35
+ **[Read the documentation](https://rob-smallshire.github.io/oaknut/basic/)** — getting started, the command reference, and the API.
36
+
37
+ Convert [BBC BASIC](https://en.wikipedia.org/wiki/BBC_BASIC) programs between
38
+ their compact on-disc *tokenised* form and a plain-text listing — the two
39
+ directions a real BBC Micro performs when you `LOAD` a program and `LIST` it —
40
+ plus line numbering for source typed without numbers.
41
+
42
+ ## The problem
43
+
44
+ A tokenised BBC BASIC program is bytecode, not text: keywords like `PRINT` and
45
+ `GOTO` are single bytes, line numbers are packed into each line's header, and a
46
+ reference such as `GOTO 100` is scrambled into a three-byte form that can never
47
+ be mistaken for a line terminator. A text codec cannot read it; decoding one as
48
+ text produces garbage.
49
+
50
+ `oaknut-basic` reproduces the **BBC BASIC II** ROM's tokeniser and de-tokeniser
51
+ exactly — every token value, flag, and the line-number encoding — so a program
52
+ round-trips between bytes and text **byte-for-byte**.
53
+
54
+ ## Installation
55
+
56
+ Install with the `[cli]` extra for the `oaknut-basic` command, or bare for the
57
+ library only:
58
+
59
+ ```
60
+ uv tool install "oaknut-basic[cli]" # the command-line tool
61
+ uv add oaknut-basic # the importable library
62
+ ```
63
+
64
+ `pip` works identically with the same names. `oaknut-basic` requires Python
65
+ 3.11 or newer.
66
+
67
+ ## Command-line usage
68
+
69
+ ```
70
+ $ oaknut-basic --help
71
+ Usage: oaknut-basic [OPTIONS] COMMAND [ARGS]...
72
+
73
+ Tools for BBC BASIC source and tokenised programs.
74
+
75
+ Options:
76
+ --version Show the version and exit.
77
+ --help Show this message and exit.
78
+
79
+ Commands:
80
+ detokenise De-tokenise a stored BBC BASIC program into source text.
81
+ number Prepend ascending line numbers to an unnumbered BBC BASIC...
82
+ tokenise Tokenise BBC BASIC source text into a stored program.
83
+ ```
84
+
85
+ Every command reads from a file or standard input and writes to a file or
86
+ standard output, so each works file-to-file and as a pipe stage. That makes it
87
+ compose with [`oaknut-disc`](https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-disc)
88
+ to edit a program in place on a disc image:
89
+
90
+ ```
91
+ disc get game.ssd MENU - | oaknut-basic detokenise > menu.bas
92
+ oaknut-basic tokenise menu.bas | disc put game.ssd MENU -
93
+ ```
94
+
95
+ Tokenising and de-tokenising are exact inverses, so a program survives a
96
+ there-and-back trip unchanged. `tokenise` can also number unnumbered source on
97
+ the way in (`--start` / `--step`), exactly as typing it under `AUTO` would.
98
+
99
+ ## Library usage
100
+
101
+ The library is function-shaped — `tokenise`, `detokenise`, and `number_lines`,
102
+ all importable from `oaknut.basic`:
103
+
104
+ ```python
105
+ from oaknut.basic import tokenise, detokenise
106
+
107
+ program = tokenise('10 PRINT "HELLO"\n20 GOTO 10\n') # str -> bytes
108
+ listing = detokenise(program) # bytes -> str
109
+ assert tokenise(detokenise(program)) == program # byte-exact
110
+ ```
111
+
112
+ When the program lives in a disc image, prefer the path-object wrappers
113
+ `DFSPath.read_basic` / `write_basic` (and the ADFS equivalents), which compose
114
+ the codec with the disc's character encoding and the correct load address.
115
+
116
+ ## References
117
+
118
+ - [BBC BASIC](https://en.wikipedia.org/wiki/BBC_BASIC) — Wikipedia overview of
119
+ the language and its versions.
120
+ - [BBC BASIC program format](https://beebwiki.mdfs.net/Program_format) —
121
+ BeebWiki reference for the on-disc tokenised format and the token table.
122
+
123
+ ## Part of oaknut
124
+
125
+ `oaknut-basic` is one package in the
126
+ [oaknut](https://github.com/rob-smallshire/oaknut) monorepo of tools for Acorn
127
+ computer filesystems, files, and formats. It backs the `read_basic` /
128
+ `write_basic` methods of the `oaknut-dfs` and `oaknut-adfs` packages, and is
129
+ usable on its own for `.bas` / `.bbc` files outside a disc image.
130
+
131
+ ## License
132
+
133
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,111 @@
1
+ <p align="center">
2
+ <img src="https://raw.githubusercontent.com/rob-smallshire/oaknut/master/docs/basic/_static/oaknut-basic-logo.png" alt="oaknut-basic" width="300">
3
+ </p>
4
+
5
+ # oaknut-basic
6
+
7
+ [![PyPI version](https://img.shields.io/pypi/v/oaknut-basic)](https://pypi.org/project/oaknut-basic/)
8
+ [![CI](https://github.com/rob-smallshire/oaknut/actions/workflows/ci.yml/badge.svg)](https://github.com/rob-smallshire/oaknut/actions/workflows/ci.yml)
9
+ [![Python versions](https://img.shields.io/pypi/pyversions/oaknut-basic)](https://pypi.org/project/oaknut-basic/)
10
+ [![License: MIT](https://img.shields.io/pypi/l/oaknut-basic)](https://github.com/rob-smallshire/oaknut/blob/master/packages/oaknut-basic/LICENSE)
11
+ [![Documentation](https://img.shields.io/badge/docs-online-blue)](https://rob-smallshire.github.io/oaknut/basic/)
12
+
13
+ **[Read the documentation](https://rob-smallshire.github.io/oaknut/basic/)** — getting started, the command reference, and the API.
14
+
15
+ Convert [BBC BASIC](https://en.wikipedia.org/wiki/BBC_BASIC) programs between
16
+ their compact on-disc *tokenised* form and a plain-text listing — the two
17
+ directions a real BBC Micro performs when you `LOAD` a program and `LIST` it —
18
+ plus line numbering for source typed without numbers.
19
+
20
+ ## The problem
21
+
22
+ A tokenised BBC BASIC program is bytecode, not text: keywords like `PRINT` and
23
+ `GOTO` are single bytes, line numbers are packed into each line's header, and a
24
+ reference such as `GOTO 100` is scrambled into a three-byte form that can never
25
+ be mistaken for a line terminator. A text codec cannot read it; decoding one as
26
+ text produces garbage.
27
+
28
+ `oaknut-basic` reproduces the **BBC BASIC II** ROM's tokeniser and de-tokeniser
29
+ exactly — every token value, flag, and the line-number encoding — so a program
30
+ round-trips between bytes and text **byte-for-byte**.
31
+
32
+ ## Installation
33
+
34
+ Install with the `[cli]` extra for the `oaknut-basic` command, or bare for the
35
+ library only:
36
+
37
+ ```
38
+ uv tool install "oaknut-basic[cli]" # the command-line tool
39
+ uv add oaknut-basic # the importable library
40
+ ```
41
+
42
+ `pip` works identically with the same names. `oaknut-basic` requires Python
43
+ 3.11 or newer.
44
+
45
+ ## Command-line usage
46
+
47
+ ```
48
+ $ oaknut-basic --help
49
+ Usage: oaknut-basic [OPTIONS] COMMAND [ARGS]...
50
+
51
+ Tools for BBC BASIC source and tokenised programs.
52
+
53
+ Options:
54
+ --version Show the version and exit.
55
+ --help Show this message and exit.
56
+
57
+ Commands:
58
+ detokenise De-tokenise a stored BBC BASIC program into source text.
59
+ number Prepend ascending line numbers to an unnumbered BBC BASIC...
60
+ tokenise Tokenise BBC BASIC source text into a stored program.
61
+ ```
62
+
63
+ Every command reads from a file or standard input and writes to a file or
64
+ standard output, so each works file-to-file and as a pipe stage. That makes it
65
+ compose with [`oaknut-disc`](https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-disc)
66
+ to edit a program in place on a disc image:
67
+
68
+ ```
69
+ disc get game.ssd MENU - | oaknut-basic detokenise > menu.bas
70
+ oaknut-basic tokenise menu.bas | disc put game.ssd MENU -
71
+ ```
72
+
73
+ Tokenising and de-tokenising are exact inverses, so a program survives a
74
+ there-and-back trip unchanged. `tokenise` can also number unnumbered source on
75
+ the way in (`--start` / `--step`), exactly as typing it under `AUTO` would.
76
+
77
+ ## Library usage
78
+
79
+ The library is function-shaped — `tokenise`, `detokenise`, and `number_lines`,
80
+ all importable from `oaknut.basic`:
81
+
82
+ ```python
83
+ from oaknut.basic import tokenise, detokenise
84
+
85
+ program = tokenise('10 PRINT "HELLO"\n20 GOTO 10\n') # str -> bytes
86
+ listing = detokenise(program) # bytes -> str
87
+ assert tokenise(detokenise(program)) == program # byte-exact
88
+ ```
89
+
90
+ When the program lives in a disc image, prefer the path-object wrappers
91
+ `DFSPath.read_basic` / `write_basic` (and the ADFS equivalents), which compose
92
+ the codec with the disc's character encoding and the correct load address.
93
+
94
+ ## References
95
+
96
+ - [BBC BASIC](https://en.wikipedia.org/wiki/BBC_BASIC) — Wikipedia overview of
97
+ the language and its versions.
98
+ - [BBC BASIC program format](https://beebwiki.mdfs.net/Program_format) —
99
+ BeebWiki reference for the on-disc tokenised format and the token table.
100
+
101
+ ## Part of oaknut
102
+
103
+ `oaknut-basic` is one package in the
104
+ [oaknut](https://github.com/rob-smallshire/oaknut) monorepo of tools for Acorn
105
+ computer filesystems, files, and formats. It backs the `read_basic` /
106
+ `write_basic` methods of the `oaknut-dfs` and `oaknut-adfs` packages, and is
107
+ usable on its own for `.bas` / `.bbc` files outside a disc image.
108
+
109
+ ## License
110
+
111
+ MIT — see [LICENSE](LICENSE).
@@ -10,16 +10,29 @@ description = "BBC BASIC tokeniser and detokeniser for Acorn 8-bit and 32-bit BA
10
10
  readme = "README.md"
11
11
  license = "MIT"
12
12
  license-files = ["LICENSE"]
13
- requires-python = ">=3.10"
13
+ # oaknut-exception uses except* (PEP 654), so requiring it raises this
14
+ # package's Python floor to 3.11.
15
+ requires-python = ">=3.11"
14
16
  classifiers = [
15
17
  "Development Status :: 3 - Alpha",
16
18
  "Intended Audience :: Developers",
17
- "Programming Language :: Python :: 3.10",
18
19
  "Programming Language :: Python :: 3.11",
19
20
  "Programming Language :: Python :: 3.12",
20
21
  "Programming Language :: Python :: 3.13",
21
22
  ]
22
- dependencies = []
23
+ # The tokeniser and de-tokeniser raise categorised errors from the shared
24
+ # oaknut-exception hierarchy, so the CLI boundary can render them without a
25
+ # traceback. That is the package's only runtime dependency.
26
+ dependencies = ["oaknut-exception>=10.0"]
27
+
28
+ # The `cli` extra pulls the shared CLI toolkit (Click, asyoulikeit, the
29
+ # Acorn text codec and the categorised-error boundary) so this package
30
+ # can offer the standalone `oaknut-basic` command.
31
+ [project.optional-dependencies]
32
+ cli = ["oaknut-cli>=10.0"]
33
+
34
+ [project.scripts]
35
+ oaknut-basic = "oaknut.basic.cli:cli"
23
36
 
24
37
  [project.urls]
25
38
  Homepage = "https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-basic"
@@ -0,0 +1,72 @@
1
+ """BBC BASIC tokenisation and detokenisation.
2
+
3
+ Tokenised BBC BASIC is a compact on-disc representation in which
4
+ keywords like ``PRINT`` and ``GOTO`` are replaced with single bytes,
5
+ line numbers are packed at the start of each line, and string
6
+ literals and ``REM`` comments are stored in the Acorn character
7
+ encoding. This module converts between source text and that byte
8
+ representation.
9
+
10
+ BBC BASIC is a language, not a text encoding — tokenised programs
11
+ are bytecode, not text. The two functions here therefore work in
12
+ ``str`` ↔ ``bytes`` pairs and must never be composed with
13
+ ``DFSPath.read_text`` / ``write_text`` (which would silently mangle
14
+ the bytecode). The canonical way to move a BASIC program through a
15
+ disc image is ``DFSPath.read_basic`` / ``write_basic``, which wrap
16
+ these functions with the correct load-address default.
17
+
18
+ Beyond ``oaknut-exception`` — the base layer, whose categorised errors
19
+ the tokeniser and de-tokeniser raise — this module has no runtime
20
+ dependencies on any other oaknut package.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from oaknut.basic.detokeniser import detokenise
26
+ from oaknut.basic.exceptions import (
27
+ AlreadyNumberedError,
28
+ BASICError,
29
+ DetokeniseError,
30
+ InvalidLineLengthError,
31
+ LineNumberOrderError,
32
+ LineNumberRangeError,
33
+ LineTooLongError,
34
+ MissingLineMarkerError,
35
+ TokeniseError,
36
+ TruncatedProgramError,
37
+ UnnumberedLineError,
38
+ )
39
+ from oaknut.basic.numbering import (
40
+ DEFAULT_LINE_NUMBER,
41
+ DEFAULT_LINE_STEP,
42
+ number_lines,
43
+ )
44
+ from oaknut.basic.tokeniser import tokenise
45
+
46
+ __version__ = "12.7.1"
47
+
48
+ # Canonical load addresses for BBC BASIC programs on each host.
49
+ # Programs saved by *SAVE on a real machine use these by default.
50
+ BBC_BASIC_LOAD_ADDRESS = 0x1900
51
+ ELECTRON_BASIC_LOAD_ADDRESS = 0x0E00
52
+
53
+ __all__ = [
54
+ "BBC_BASIC_LOAD_ADDRESS",
55
+ "DEFAULT_LINE_NUMBER",
56
+ "DEFAULT_LINE_STEP",
57
+ "ELECTRON_BASIC_LOAD_ADDRESS",
58
+ "AlreadyNumberedError",
59
+ "BASICError",
60
+ "DetokeniseError",
61
+ "InvalidLineLengthError",
62
+ "LineNumberOrderError",
63
+ "LineNumberRangeError",
64
+ "LineTooLongError",
65
+ "MissingLineMarkerError",
66
+ "TokeniseError",
67
+ "TruncatedProgramError",
68
+ "UnnumberedLineError",
69
+ "detokenise",
70
+ "number_lines",
71
+ "tokenise",
72
+ ]
@@ -0,0 +1,279 @@
1
+ """Click command-line interface for BBC BASIC tools.
2
+
3
+ A standalone ``oaknut-basic`` entry point in the style of the unified
4
+ ``disc`` CLI: Click for command parsing, asyoulikeit for formatted
5
+ output, and the shared :func:`oaknut.exception.handled_errors` boundary
6
+ so categorised errors surface as clean messages rather than tracebacks.
7
+
8
+ This module imports Click and is loaded only when ``oaknut-basic`` is
9
+ installed with its ``[cli]`` extra; the library core never imports it.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import click
15
+
16
+ from . import __version__
17
+
18
+
19
+ class _BasicGroup(click.Group):
20
+ """Click group applying the oaknut error boundary to every command.
21
+
22
+ Wrapping :meth:`invoke` means each subcommand is guarded once, with
23
+ no per-command decorator: categorised
24
+ :class:`~oaknut.exception.DataError` /
25
+ :class:`~oaknut.exception.ConfigurationError` are printed and turned
26
+ into the matching exit code. ``--debug`` re-raises them with a full
27
+ traceback for development.
28
+ """
29
+
30
+ def invoke(self, ctx: click.Context):
31
+ from oaknut.exception import handled_errors
32
+
33
+ debug = bool(ctx.params.get("debug", False))
34
+ with handled_errors(debug=debug):
35
+ return super().invoke(ctx)
36
+
37
+
38
+ @click.group(cls=_BasicGroup)
39
+ @click.version_option(__version__, prog_name="oaknut-basic")
40
+ @click.option(
41
+ "--debug",
42
+ is_flag=True,
43
+ hidden=True,
44
+ help="Re-raise data and configuration errors with a full traceback.",
45
+ )
46
+ def cli(debug: bool) -> None: # noqa: ARG001 - read by the group error boundary
47
+ """Tools for BBC BASIC source and tokenised programs."""
48
+
49
+
50
+ def _validate_encoding(ctx: click.Context, param: click.Parameter, value: str) -> str:
51
+ """Reject an unknown ``--encoding`` with a clean usage error.
52
+
53
+ Importing :mod:`oaknut.file` first registers the ``"acorn"`` codec,
54
+ so it resolves alongside the stdlib encodings.
55
+ """
56
+ import codecs
57
+
58
+ import oaknut.file # noqa: F401 - registers the "acorn" codec as a side effect
59
+
60
+ try:
61
+ codecs.lookup(value)
62
+ except LookupError:
63
+ raise click.BadParameter(f"unknown encoding: {value!r}") from None
64
+ return value
65
+
66
+
67
+ @cli.command()
68
+ @click.argument(
69
+ "input_stream",
70
+ metavar="[INPUT]",
71
+ type=click.File("rb"),
72
+ default="-",
73
+ required=False,
74
+ )
75
+ @click.argument(
76
+ "output_stream",
77
+ metavar="[OUTPUT]",
78
+ type=click.File("wb"),
79
+ default="-",
80
+ required=False,
81
+ )
82
+ @click.option(
83
+ "--encoding",
84
+ default="utf-8",
85
+ show_default=True,
86
+ callback=_validate_encoding,
87
+ help='Text encoding of INPUT and OUTPUT. Use "acorn" for the BBC '
88
+ "character set (CR line endings) when writing to a disc image.",
89
+ )
90
+ @click.option(
91
+ "--start",
92
+ type=click.IntRange(min=0),
93
+ default=10,
94
+ show_default=True,
95
+ help="Line number given to the first line.",
96
+ )
97
+ @click.option(
98
+ "--step",
99
+ type=click.IntRange(min=1),
100
+ default=10,
101
+ show_default=True,
102
+ help="Increment between successive line numbers.",
103
+ )
104
+ def number(input_stream, output_stream, encoding: str, start: int, step: int) -> None:
105
+ """Prepend ascending line numbers to an unnumbered BBC BASIC program.
106
+
107
+ Reads BASIC source text from INPUT and writes the numbered program
108
+ to OUTPUT. Both default to ``-``: INPUT to standard input, OUTPUT to
109
+ standard output, so the command works file-to-file ::
110
+
111
+ oaknut-basic number menu.bas menu-numbered.bas
112
+
113
+ and as a pipe stage between ``disc get`` and ``disc put`` ::
114
+
115
+ disc get game.ssd MENU - | oaknut-basic number --encoding acorn | disc put game.ssd MENU -
116
+
117
+ Text is read and written in ``--encoding`` (``utf-8`` by default; pass
118
+ ``acorn`` for the BBC character set). Input line endings are accepted
119
+ in any of the ``\\n`` / ``\\r`` / ``\\r\\n`` forms; output uses the
120
+ Acorn-native ``\\r`` under the ``acorn`` encoding and ``\\n``
121
+ otherwise.
122
+
123
+ Line numbering mirrors the BBC's ``AUTO start,step``: internal
124
+ references such as ``GOTO`` are left untouched, so they must already
125
+ match the numbering requested here.
126
+ """
127
+ import codecs
128
+
129
+ from oaknut.basic import number_lines
130
+ from oaknut.file import decode_text, encode_text
131
+
132
+ line_terminator = "\r" if codecs.lookup(encoding).name == "acorn" else "\n"
133
+ source = decode_text(input_stream.read(), encoding=encoding)
134
+ numbered = number_lines(source, start=start, step=step)
135
+ output_stream.write(encode_text(numbered, encoding=encoding, newline=line_terminator))
136
+
137
+
138
+ def _source_to_code_points(data: bytes, encoding: str) -> str:
139
+ """Return the latin-1 code-point view of source text's Acorn bytes.
140
+
141
+ The tokeniser treats each character's code point as a raw byte, so
142
+ the CLI normalises the input encoding to Acorn bytes here. Under the
143
+ default ``acorn`` encoding the input is already Acorn bytes and is
144
+ passed through untouched; otherwise it is decoded and re-encoded to
145
+ the BBC character set.
146
+ """
147
+ import codecs
148
+
149
+ import oaknut.file # noqa: F401 - registers the "acorn" codec
150
+
151
+ if codecs.lookup(encoding).name == "acorn":
152
+ acorn_bytes = data
153
+ else:
154
+ acorn_bytes = data.decode(encoding).encode("acorn")
155
+ return acorn_bytes.decode("latin-1")
156
+
157
+
158
+ def _listing_to_bytes(listing: str, encoding: str) -> bytes:
159
+ """Encode a de-tokenised listing into output bytes in *encoding*.
160
+
161
+ The listing's characters are Acorn code points separated by ``\\n``.
162
+ Under ``acorn`` the newlines become the Acorn-native ``\\r``;
163
+ otherwise the Acorn bytes are transcoded to *encoding*, keeping
164
+ ``\\n``.
165
+ """
166
+ import codecs
167
+
168
+ import oaknut.file # noqa: F401 - registers the "acorn" codec
169
+
170
+ if codecs.lookup(encoding).name == "acorn":
171
+ return listing.replace("\n", "\r").encode("latin-1")
172
+ return listing.encode("latin-1").decode("acorn").encode(encoding)
173
+
174
+
175
+ @cli.command()
176
+ @click.argument(
177
+ "input_stream",
178
+ metavar="[INPUT]",
179
+ type=click.File("rb"),
180
+ default="-",
181
+ required=False,
182
+ )
183
+ @click.argument(
184
+ "output_stream",
185
+ metavar="[OUTPUT]",
186
+ type=click.File("wb"),
187
+ default="-",
188
+ required=False,
189
+ )
190
+ @click.option(
191
+ "--encoding",
192
+ default="acorn",
193
+ show_default=True,
194
+ callback=_validate_encoding,
195
+ help="Text encoding of the INPUT source. Defaults to the BBC character set.",
196
+ )
197
+ @click.option(
198
+ "--start",
199
+ type=click.IntRange(min=0),
200
+ default=None,
201
+ help="Auto-number from this line (as AUTO would); INPUT must be unnumbered. "
202
+ "Defaults to 10 when only --step is given.",
203
+ )
204
+ @click.option(
205
+ "--step",
206
+ type=click.IntRange(min=1),
207
+ default=None,
208
+ help="Auto-numbering increment. Defaults to 10 when only --start is given.",
209
+ )
210
+ def tokenise(
211
+ input_stream, output_stream, encoding: str, start: int | None, step: int | None
212
+ ) -> None:
213
+ """Tokenise BBC BASIC source text into a stored program.
214
+
215
+ Reads numbered BASIC source from INPUT and writes the tokenised
216
+ program bytes to OUTPUT. Both default to ``-`` (stdin / stdout), so it
217
+ drops in alongside ``disc put`` ::
218
+
219
+ oaknut-basic tokenise menu.bas MENU
220
+ cat menu.bas | oaknut-basic tokenise | disc put game.ssd MENU -
221
+
222
+ Passing --start and/or --step auto-numbers unnumbered source, exactly
223
+ as typing it under ``AUTO`` would; it is an error to use them on source
224
+ that already carries line numbers ::
225
+
226
+ oaknut-basic tokenise --start 10 unnumbered.bas MENU
227
+
228
+ INPUT is read in --encoding (the BBC ``acorn`` character set by default;
229
+ pass ``utf-8`` for source authored in a modern editor).
230
+ """
231
+ from oaknut.basic import tokenise as tokenise_source
232
+
233
+ source = _source_to_code_points(input_stream.read(), encoding)
234
+ output_stream.write(tokenise_source(source, start=start, step=step))
235
+
236
+
237
+ @cli.command()
238
+ @click.argument(
239
+ "input_stream",
240
+ metavar="[INPUT]",
241
+ type=click.File("rb"),
242
+ default="-",
243
+ required=False,
244
+ )
245
+ @click.argument(
246
+ "output_stream",
247
+ metavar="[OUTPUT]",
248
+ type=click.File("wb"),
249
+ default="-",
250
+ required=False,
251
+ )
252
+ @click.option(
253
+ "--encoding",
254
+ default="acorn",
255
+ show_default=True,
256
+ callback=_validate_encoding,
257
+ help="Text encoding for the OUTPUT source. Defaults to the BBC character set.",
258
+ )
259
+ def detokenise(input_stream, output_stream, encoding: str) -> None:
260
+ """De-tokenise a stored BBC BASIC program into source text.
261
+
262
+ Reads a tokenised program from INPUT and writes numbered source text
263
+ to OUTPUT. Both default to ``-`` (stdin / stdout), so it drops in
264
+ alongside ``disc get`` ::
265
+
266
+ oaknut-basic detokenise MENU menu.bas
267
+ disc get game.ssd MENU - | oaknut-basic detokenise
268
+
269
+ OUTPUT is written in --encoding (the BBC ``acorn`` character set with CR
270
+ line endings by default; pass ``utf-8`` for a host text file).
271
+ """
272
+ from oaknut.basic import detokenise as detokenise_program
273
+
274
+ listing = detokenise_program(input_stream.read())
275
+ output_stream.write(_listing_to_bytes(listing, encoding))
276
+
277
+
278
+ if __name__ == "__main__": # pragma: no cover
279
+ cli()