oaknut-basic 12.7.0__tar.gz → 12.7.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/PKG-INFO +76 -23
  2. oaknut_basic-12.7.2/README.md +163 -0
  3. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/pyproject.toml +2 -2
  4. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut/basic/__init__.py +33 -4
  5. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut/basic/cli.py +181 -1
  6. oaknut_basic-12.7.2/src/oaknut/basic/datafile.py +472 -0
  7. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut/basic/exceptions.py +127 -0
  8. oaknut_basic-12.7.2/src/oaknut/basic/float5.py +116 -0
  9. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut_basic.egg-info/PKG-INFO +76 -23
  10. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut_basic.egg-info/SOURCES.txt +5 -0
  11. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut_basic.egg-info/requires.txt +1 -0
  12. oaknut_basic-12.7.2/tests/test_data_cli.py +117 -0
  13. oaknut_basic-12.7.2/tests/test_datafile.py +258 -0
  14. oaknut_basic-12.7.2/tests/test_float5.py +81 -0
  15. oaknut_basic-12.7.0/README.md +0 -111
  16. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/LICENSE +0 -0
  17. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/setup.cfg +0 -0
  18. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut/basic/detokeniser.py +0 -0
  19. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut/basic/linenumber.py +0 -0
  20. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut/basic/numbering.py +0 -0
  21. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut/basic/tokeniser.py +0 -0
  22. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut/basic/tokens.py +0 -0
  23. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut_basic.egg-info/dependency_links.txt +0 -0
  24. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut_basic.egg-info/entry_points.txt +0 -0
  25. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/src/oaknut_basic.egg-info/top_level.txt +0 -0
  26. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/tests/test_basic.py +0 -0
  27. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/tests/test_cli.py +0 -0
  28. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/tests/test_crunch_rules.py +0 -0
  29. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/tests/test_detokeniser.py +0 -0
  30. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/tests/test_keyword_coverage.py +0 -0
  31. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/tests/test_linenumber.py +0 -0
  32. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/tests/test_numbering.py +0 -0
  33. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/tests/test_rom_golden.py +0 -0
  34. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/tests/test_rom_golden_detokenise.py +0 -0
  35. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/tests/test_tokeniser.py +0 -0
  36. {oaknut_basic-12.7.0 → oaknut_basic-12.7.2}/tests/test_tokens.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: oaknut-basic
3
- Version: 12.7.0
4
- Summary: BBC BASIC tokeniser and detokeniser for Acorn 8-bit and 32-bit BASIC source files
3
+ Version: 12.7.2
4
+ Summary: BBC BASIC tools: program tokeniser/de-tokeniser and PRINT#/INPUT# data-file reader/writer
5
5
  Author-email: Robert Smallshire <robert@smallshire.org.uk>
6
6
  License-Expression: MIT
7
7
  Project-URL: Homepage, https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-basic
@@ -16,6 +16,7 @@ Requires-Python: >=3.11
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: oaknut-exception>=10.0
19
+ Requires-Dist: oaknut-codecs>=12.7
19
20
  Provides-Extra: cli
20
21
  Requires-Dist: oaknut-cli>=10.0; extra == "cli"
21
22
  Dynamic: license-file
@@ -34,22 +35,35 @@ Dynamic: license-file
34
35
 
35
36
  **[Read the documentation](https://rob-smallshire.github.io/oaknut/basic/)** — getting started, the command reference, and the API.
36
37
 
37
- Convert [BBC BASIC](https://en.wikipedia.org/wiki/BBC_BASIC) programs between
38
- their compact on-disc *tokenised* form and a plain-text listing — the two
39
- directions a real BBC Micro performs when you `LOAD` a program and `LIST` it —
40
- plus line numbering for source typed without numbers.
38
+ Work with the persistent artefacts a [BBC BASIC](https://en.wikipedia.org/wiki/BBC_BASIC)
39
+ program leaves behind both the **code** and the **data**:
40
+
41
+ - **Programs.** Convert a program between its compact on-disc *tokenised* form
42
+ and a plain-text listing — the two directions a real BBC Micro performs when
43
+ you `LOAD` and `LIST` it — plus line numbering for source typed without
44
+ numbers.
45
+ - **Data files.** Read and write the channel-based files a program creates with
46
+ `OPENOUT` and writes with `PRINT#` and `BPUT#`, translating their tagged
47
+ records to and from native Python values.
41
48
 
42
49
  ## The problem
43
50
 
44
- A tokenised BBC BASIC program is bytecode, not text: keywords like `PRINT` and
45
- `GOTO` are single bytes, line numbers are packed into each line's header, and a
46
- reference such as `GOTO 100` is scrambled into a three-byte form that can never
47
- be mistaken for a line terminator. A text codec cannot read it; decoding one as
48
- text produces garbage.
51
+ Both formats are bytecode, not text, and idiosyncratic to the BBC.
52
+
53
+ A tokenised program packs keywords like `PRINT` and `GOTO` into single bytes,
54
+ folds line numbers into each line's header, and scrambles a reference such as
55
+ `GOTO 100` into a three-byte form that can never be mistaken for a line
56
+ terminator. A text codec decoding one produces garbage.
49
57
 
50
- `oaknut-basic` reproduces the **BBC BASIC II** ROM's tokeniser and de-tokeniser
51
- exactly every token value, flag, and the line-number encoding so a program
52
- round-trips between bytes and text **byte-for-byte**.
58
+ A `PRINT#` data file is just as surprising: `PRINT#channel, 42` writes a type
59
+ tag and the number's bytes **in reverse**, not the characters `4` `2`; strings
60
+ go out length-prefixed and backwards, and reals use the BBC's packed 5-byte
61
+ floating-point format. The file is meant to be read back only by `INPUT#`.
62
+
63
+ `oaknut-basic` reproduces the **BBC BASIC II** ROM's behaviour exactly — every
64
+ token value and flag, the line-number encoding, the record tags and the 5-byte
65
+ REAL format — so a program round-trips between bytes and text **byte-for-byte**,
66
+ and a data file round-trips through Python values **byte-for-byte**.
53
67
 
54
68
  ## Installation
55
69
 
@@ -70,21 +84,22 @@ uv add oaknut-basic # the importable library
70
84
  $ oaknut-basic --help
71
85
  Usage: oaknut-basic [OPTIONS] COMMAND [ARGS]...
72
86
 
73
- Tools for BBC BASIC source and tokenised programs.
87
+ Tools for BBC BASIC programs and data files.
74
88
 
75
89
  Options:
76
90
  --version Show the version and exit.
77
91
  --help Show this message and exit.
78
92
 
79
93
  Commands:
94
+ data Read and write BBC BASIC data files.
80
95
  detokenise De-tokenise a stored BBC BASIC program into source text.
81
96
  number Prepend ascending line numbers to an unnumbered BBC BASIC...
82
97
  tokenise Tokenise BBC BASIC source text into a stored program.
83
98
  ```
84
99
 
85
- Every command reads from a file or standard input and writes to a file or
86
- standard output, so each works file-to-file and as a pipe stage. That makes it
87
- compose with [`oaknut-disc`](https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-disc)
100
+ The program commands read from a file or standard input and write to a file or
101
+ standard output, so each works file-to-file and as a pipe stage. That makes
102
+ them compose with [`oaknut-disc`](https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-disc)
88
103
  to edit a program in place on a disc image:
89
104
 
90
105
  ```
@@ -93,13 +108,24 @@ oaknut-basic tokenise menu.bas | disc put game.ssd MENU -
93
108
  ```
94
109
 
95
110
  Tokenising and de-tokenising are exact inverses, so a program survives a
96
- there-and-back trip unchanged. `tokenise` can also number unnumbered source on
97
- the way in (`--start` / `--step`), exactly as typing it under `AUTO` would.
111
+ there-and-back trip unchanged. The `tokenise` command can also number
112
+ unnumbered source on the way in (`--start` / `--step`), exactly as typing it
113
+ under `AUTO` would.
114
+
115
+ The `data` subcommands turn a `PRINT#` data file into something host tools can
116
+ read. The `inspect` command shows its records as a table; `decode` and `encode`
117
+ are a lossless JSON round-trip pair for editing or generating a file:
118
+
119
+ ```
120
+ oaknut-basic data inspect scores.dat
121
+ oaknut-basic data decode scores.dat | jq '.[0]'
122
+ echo '[42, "HELLO", 3.5]' | oaknut-basic data encode - scores.dat
123
+ ```
98
124
 
99
125
  ## Library usage
100
126
 
101
- The library is function-shaped `tokenise`, `detokenise`, and `number_lines`,
102
- all importable from `oaknut.basic`:
127
+ Programs are handled by the functions `tokenise`, `detokenise`, and
128
+ `number_lines`, all importable from `oaknut.basic`:
103
129
 
104
130
  ```python
105
131
  from oaknut.basic import tokenise, detokenise
@@ -113,12 +139,38 @@ When the program lives in a disc image, prefer the path-object wrappers
113
139
  `DFSPath.read_basic` / `write_basic` (and the ADFS equivalents), which compose
114
140
  the codec with the disc's character encoding and the correct load address.
115
141
 
142
+ Data files are handled by a context-managed, file-like object. The
143
+ module-level `open` mirrors the built-in one: a `mode` string selects a reader,
144
+ a writer, or a combined object, and accepts a path or a binary stream. The
145
+ polymorphic `write` picks the record type from the Python value; typed
146
+ `read_int` / `read_float` / `read_str` read it back without coercion:
147
+
148
+ ```python
149
+ from oaknut.basic import datafile
150
+
151
+ with datafile.open("scores.dat", "w") as f:
152
+ f.write("ALICE") # str -> string record
153
+ f.write(42) # int -> integer record
154
+ f.write(3.5) # float -> real record
155
+
156
+ with datafile.open("scores.dat", "r") as f:
157
+ for value in f: # yields "ALICE", 42, 3.5
158
+ print(value)
159
+ ```
160
+
161
+ Strings use the BBC `acorn` character set by default, and reals convert through
162
+ the packed 5-byte REAL format exposed as `pack_float5` / `unpack_float5`.
163
+
116
164
  ## References
117
165
 
118
166
  - [BBC BASIC](https://en.wikipedia.org/wiki/BBC_BASIC) — Wikipedia overview of
119
167
  the language and its versions.
120
168
  - [BBC BASIC program format](https://beebwiki.mdfs.net/Program_format) —
121
169
  BeebWiki reference for the on-disc tokenised format and the token table.
170
+ - [Format of a random access file](https://beebwiki.mdfs.net/Acorn_DFS_disc_format) —
171
+ BeebWiki background on the disc filing system that holds these files; the
172
+ `PRINT#` record tags and 5-byte REAL format are documented in this package's
173
+ own API reference.
122
174
 
123
175
  ## Part of oaknut
124
176
 
@@ -126,7 +178,8 @@ the codec with the disc's character encoding and the correct load address.
126
178
  [oaknut](https://github.com/rob-smallshire/oaknut) monorepo of tools for Acorn
127
179
  computer filesystems, files, and formats. It backs the `read_basic` /
128
180
  `write_basic` methods of the `oaknut-dfs` and `oaknut-adfs` packages, and is
129
- usable on its own for `.bas` / `.bbc` files outside a disc image.
181
+ usable on its own for the `.bas` / `.bbc` programs and the data files BBC BASIC
182
+ leaves on a disc.
130
183
 
131
184
  ## License
132
185
 
@@ -0,0 +1,163 @@
1
+ <p align="center">
2
+ <img src="https://raw.githubusercontent.com/rob-smallshire/oaknut/master/docs/basic/_static/oaknut-basic-logo.png" alt="oaknut-basic" width="300">
3
+ </p>
4
+
5
+ # oaknut-basic
6
+
7
+ [![PyPI version](https://img.shields.io/pypi/v/oaknut-basic)](https://pypi.org/project/oaknut-basic/)
8
+ [![CI](https://github.com/rob-smallshire/oaknut/actions/workflows/ci.yml/badge.svg)](https://github.com/rob-smallshire/oaknut/actions/workflows/ci.yml)
9
+ [![Python versions](https://img.shields.io/pypi/pyversions/oaknut-basic)](https://pypi.org/project/oaknut-basic/)
10
+ [![License: MIT](https://img.shields.io/pypi/l/oaknut-basic)](https://github.com/rob-smallshire/oaknut/blob/master/packages/oaknut-basic/LICENSE)
11
+ [![Documentation](https://img.shields.io/badge/docs-online-blue)](https://rob-smallshire.github.io/oaknut/basic/)
12
+
13
+ **[Read the documentation](https://rob-smallshire.github.io/oaknut/basic/)** — getting started, the command reference, and the API.
14
+
15
+ Work with the persistent artefacts a [BBC BASIC](https://en.wikipedia.org/wiki/BBC_BASIC)
16
+ program leaves behind — both the **code** and the **data**:
17
+
18
+ - **Programs.** Convert a program between its compact on-disc *tokenised* form
19
+ and a plain-text listing — the two directions a real BBC Micro performs when
20
+ you `LOAD` and `LIST` it — plus line numbering for source typed without
21
+ numbers.
22
+ - **Data files.** Read and write the channel-based files a program creates with
23
+ `OPENOUT` and writes with `PRINT#` and `BPUT#`, translating their tagged
24
+ records to and from native Python values.
25
+
26
+ ## The problem
27
+
28
+ Both formats are bytecode, not text, and idiosyncratic to the BBC.
29
+
30
+ A tokenised program packs keywords like `PRINT` and `GOTO` into single bytes,
31
+ folds line numbers into each line's header, and scrambles a reference such as
32
+ `GOTO 100` into a three-byte form that can never be mistaken for a line
33
+ terminator. A text codec decoding one produces garbage.
34
+
35
+ A `PRINT#` data file is just as surprising: `PRINT#channel, 42` writes a type
36
+ tag and the number's bytes **in reverse**, not the characters `4` `2`; strings
37
+ go out length-prefixed and backwards, and reals use the BBC's packed 5-byte
38
+ floating-point format. The file is meant to be read back only by `INPUT#`.
39
+
40
+ `oaknut-basic` reproduces the **BBC BASIC II** ROM's behaviour exactly — every
41
+ token value and flag, the line-number encoding, the record tags and the 5-byte
42
+ REAL format — so a program round-trips between bytes and text **byte-for-byte**,
43
+ and a data file round-trips through Python values **byte-for-byte**.
44
+
45
+ ## Installation
46
+
47
+ Install with the `[cli]` extra for the `oaknut-basic` command, or bare for the
48
+ library only:
49
+
50
+ ```
51
+ uv tool install "oaknut-basic[cli]" # the command-line tool
52
+ uv add oaknut-basic # the importable library
53
+ ```
54
+
55
+ `pip` works identically with the same names. `oaknut-basic` requires Python
56
+ 3.11 or newer.
57
+
58
+ ## Command-line usage
59
+
60
+ ```
61
+ $ oaknut-basic --help
62
+ Usage: oaknut-basic [OPTIONS] COMMAND [ARGS]...
63
+
64
+ Tools for BBC BASIC programs and data files.
65
+
66
+ Options:
67
+ --version Show the version and exit.
68
+ --help Show this message and exit.
69
+
70
+ Commands:
71
+ data Read and write BBC BASIC data files.
72
+ detokenise De-tokenise a stored BBC BASIC program into source text.
73
+ number Prepend ascending line numbers to an unnumbered BBC BASIC...
74
+ tokenise Tokenise BBC BASIC source text into a stored program.
75
+ ```
76
+
77
+ The program commands read from a file or standard input and write to a file or
78
+ standard output, so each works file-to-file and as a pipe stage. That makes
79
+ them compose with [`oaknut-disc`](https://github.com/rob-smallshire/oaknut/tree/master/packages/oaknut-disc)
80
+ to edit a program in place on a disc image:
81
+
82
+ ```
83
+ disc get game.ssd MENU - | oaknut-basic detokenise > menu.bas
84
+ oaknut-basic tokenise menu.bas | disc put game.ssd MENU -
85
+ ```
86
+
87
+ Tokenising and de-tokenising are exact inverses, so a program survives a
88
+ there-and-back trip unchanged. The `tokenise` command can also number
89
+ unnumbered source on the way in (`--start` / `--step`), exactly as typing it
90
+ under `AUTO` would.
91
+
92
+ The `data` subcommands turn a `PRINT#` data file into something host tools can
93
+ read. The `inspect` command shows its records as a table; `decode` and `encode`
94
+ are a lossless JSON round-trip pair for editing or generating a file:
95
+
96
+ ```
97
+ oaknut-basic data inspect scores.dat
98
+ oaknut-basic data decode scores.dat | jq '.[0]'
99
+ echo '[42, "HELLO", 3.5]' | oaknut-basic data encode - scores.dat
100
+ ```
101
+
102
+ ## Library usage
103
+
104
+ Programs are handled by the functions `tokenise`, `detokenise`, and
105
+ `number_lines`, all importable from `oaknut.basic`:
106
+
107
+ ```python
108
+ from oaknut.basic import tokenise, detokenise
109
+
110
+ program = tokenise('10 PRINT "HELLO"\n20 GOTO 10\n') # str -> bytes
111
+ listing = detokenise(program) # bytes -> str
112
+ assert tokenise(detokenise(program)) == program # byte-exact
113
+ ```
114
+
115
+ When the program lives in a disc image, prefer the path-object wrappers
116
+ `DFSPath.read_basic` / `write_basic` (and the ADFS equivalents), which compose
117
+ the codec with the disc's character encoding and the correct load address.
118
+
119
+ Data files are handled by a context-managed, file-like object. The
120
+ module-level `open` mirrors the built-in one: a `mode` string selects a reader,
121
+ a writer, or a combined object, and accepts a path or a binary stream. The
122
+ polymorphic `write` picks the record type from the Python value; typed
123
+ `read_int` / `read_float` / `read_str` read it back without coercion:
124
+
125
+ ```python
126
+ from oaknut.basic import datafile
127
+
128
+ with datafile.open("scores.dat", "w") as f:
129
+ f.write("ALICE") # str -> string record
130
+ f.write(42) # int -> integer record
131
+ f.write(3.5) # float -> real record
132
+
133
+ with datafile.open("scores.dat", "r") as f:
134
+ for value in f: # yields "ALICE", 42, 3.5
135
+ print(value)
136
+ ```
137
+
138
+ Strings use the BBC `acorn` character set by default, and reals convert through
139
+ the packed 5-byte REAL format exposed as `pack_float5` / `unpack_float5`.
140
+
141
+ ## References
142
+
143
+ - [BBC BASIC](https://en.wikipedia.org/wiki/BBC_BASIC) — Wikipedia overview of
144
+ the language and its versions.
145
+ - [BBC BASIC program format](https://beebwiki.mdfs.net/Program_format) —
146
+ BeebWiki reference for the on-disc tokenised format and the token table.
147
+ - [Format of a random access file](https://beebwiki.mdfs.net/Acorn_DFS_disc_format) —
148
+ BeebWiki background on the disc filing system that holds these files; the
149
+ `PRINT#` record tags and 5-byte REAL format are documented in this package's
150
+ own API reference.
151
+
152
+ ## Part of oaknut
153
+
154
+ `oaknut-basic` is one package in the
155
+ [oaknut](https://github.com/rob-smallshire/oaknut) monorepo of tools for Acorn
156
+ computer filesystems, files, and formats. It backs the `read_basic` /
157
+ `write_basic` methods of the `oaknut-dfs` and `oaknut-adfs` packages, and is
158
+ usable on its own for the `.bas` / `.bbc` programs and the data files BBC BASIC
159
+ leaves on a disc.
160
+
161
+ ## License
162
+
163
+ MIT — see [LICENSE](LICENSE).
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
6
6
  name = "oaknut-basic"
7
7
  dynamic = ["version"]
8
8
  authors = [{ name = "Robert Smallshire", email = "robert@smallshire.org.uk" }]
9
- description = "BBC BASIC tokeniser and detokeniser for Acorn 8-bit and 32-bit BASIC source files"
9
+ description = "BBC BASIC tools: program tokeniser/de-tokeniser and PRINT#/INPUT# data-file reader/writer"
10
10
  readme = "README.md"
11
11
  license = "MIT"
12
12
  license-files = ["LICENSE"]
@@ -23,7 +23,7 @@ classifiers = [
23
23
  # The tokeniser and de-tokeniser raise categorised errors from the shared
24
24
  # oaknut-exception hierarchy, so the CLI boundary can render them without a
25
25
  # traceback. That is the package's only runtime dependency.
26
- dependencies = ["oaknut-exception>=10.0"]
26
+ dependencies = ["oaknut-exception>=10.0", "oaknut-codecs>=12.7"]
27
27
 
28
28
  # The `cli` extra pulls the shared CLI toolkit (Click, asyoulikeit, the
29
29
  # Acorn text codec and the categorised-error boundary) so this package
@@ -15,27 +15,43 @@ the bytecode). The canonical way to move a BASIC program through a
15
15
  disc image is ``DFSPath.read_basic`` / ``write_basic``, which wrap
16
16
  these functions with the correct load-address default.
17
17
 
18
- Beyond ``oaknut-exception`` the base layer, whose categorised errors
19
- the tokeniser and de-tokeniser raise this module has no runtime
20
- dependencies on any other oaknut package.
18
+ The tokeniser and de-tokeniser raise categorised errors from
19
+ ``oaknut-exception``; the data-file API additionally uses the ``acorn``
20
+ text codec from ``oaknut-codecs`` for string records. Both are bottom-layer
21
+ packages, so ``oaknut-basic`` stays independent of the file and disc-image
22
+ layers.
21
23
  """
22
24
 
23
25
  from __future__ import annotations
24
26
 
27
+ from oaknut.basic.datafile import (
28
+ BbcBasicDataFile,
29
+ BbcBasicDataFileBase,
30
+ BbcBasicDataReader,
31
+ BbcBasicDataWriter,
32
+ )
25
33
  from oaknut.basic.detokeniser import detokenise
26
34
  from oaknut.basic.exceptions import (
27
35
  AlreadyNumberedError,
28
36
  BASICError,
37
+ DataFileError,
38
+ DataFileTypeMismatchError,
29
39
  DetokeniseError,
40
+ Float5RangeError,
41
+ IntegerRangeError,
30
42
  InvalidLineLengthError,
31
43
  LineNumberOrderError,
32
44
  LineNumberRangeError,
33
45
  LineTooLongError,
34
46
  MissingLineMarkerError,
47
+ StringTooLongError,
35
48
  TokeniseError,
36
49
  TruncatedProgramError,
50
+ TruncatedRecordError,
51
+ UnknownTagError,
37
52
  UnnumberedLineError,
38
53
  )
54
+ from oaknut.basic.float5 import pack_float5, unpack_float5
39
55
  from oaknut.basic.numbering import (
40
56
  DEFAULT_LINE_NUMBER,
41
57
  DEFAULT_LINE_STEP,
@@ -43,7 +59,7 @@ from oaknut.basic.numbering import (
43
59
  )
44
60
  from oaknut.basic.tokeniser import tokenise
45
61
 
46
- __version__ = "12.7.0"
62
+ __version__ = "12.7.2"
47
63
 
48
64
  # Canonical load addresses for BBC BASIC programs on each host.
49
65
  # Programs saved by *SAVE on a real machine use these by default.
@@ -57,16 +73,29 @@ __all__ = [
57
73
  "ELECTRON_BASIC_LOAD_ADDRESS",
58
74
  "AlreadyNumberedError",
59
75
  "BASICError",
76
+ "BbcBasicDataFile",
77
+ "BbcBasicDataFileBase",
78
+ "BbcBasicDataReader",
79
+ "BbcBasicDataWriter",
80
+ "DataFileError",
81
+ "DataFileTypeMismatchError",
60
82
  "DetokeniseError",
83
+ "Float5RangeError",
84
+ "IntegerRangeError",
61
85
  "InvalidLineLengthError",
62
86
  "LineNumberOrderError",
63
87
  "LineNumberRangeError",
64
88
  "LineTooLongError",
65
89
  "MissingLineMarkerError",
90
+ "StringTooLongError",
66
91
  "TokeniseError",
67
92
  "TruncatedProgramError",
93
+ "TruncatedRecordError",
94
+ "UnknownTagError",
68
95
  "UnnumberedLineError",
69
96
  "detokenise",
70
97
  "number_lines",
98
+ "pack_float5",
99
+ "unpack_float5",
71
100
  "tokenise",
72
101
  ]
@@ -12,6 +12,7 @@ installed with its ``[cli]`` extra; the library core never imports it.
12
12
  from __future__ import annotations
13
13
 
14
14
  import click
15
+ from asyoulikeit.cli import report_output
15
16
 
16
17
  from . import __version__
17
18
 
@@ -44,7 +45,7 @@ class _BasicGroup(click.Group):
44
45
  help="Re-raise data and configuration errors with a full traceback.",
45
46
  )
46
47
  def cli(debug: bool) -> None: # noqa: ARG001 - read by the group error boundary
47
- """Tools for BBC BASIC source and tokenised programs."""
48
+ """Tools for BBC BASIC programs and data files."""
48
49
 
49
50
 
50
51
  def _validate_encoding(ctx: click.Context, param: click.Parameter, value: str) -> str:
@@ -275,5 +276,184 @@ def detokenise(input_stream, output_stream, encoding: str) -> None:
275
276
  output_stream.write(_listing_to_bytes(listing, encoding))
276
277
 
277
278
 
279
+ @cli.group()
280
+ def data() -> None:
281
+ """Read and write BBC BASIC data files (PRINT#/INPUT#/BPUT#/BGET#).
282
+
283
+ These commands work with the type-tagged record files BBC BASIC
284
+ creates with ``OPENOUT`` and writes with ``PRINT#``. ``inspect`` shows
285
+ a file's contents as a table; ``decode`` and ``encode`` are a lossless
286
+ JSON round-trip pair for editing and generating such files.
287
+ """
288
+
289
+
290
+ def _kind_of(value: object) -> str:
291
+ """Return the record-type name for a value read from a data file."""
292
+ if isinstance(value, bool): # pragma: no cover - reader never yields bool
293
+ raise TypeError("unexpected bool from a data file")
294
+ if isinstance(value, int):
295
+ return "int"
296
+ if isinstance(value, float):
297
+ return "real"
298
+ return "string"
299
+
300
+
301
+ def _read_records(input_stream, encoding: str):
302
+ """Yield ``(offset, kind, value)`` for each tagged record in a stream."""
303
+ from oaknut.basic.datafile import open as open_datafile
304
+
305
+ with open_datafile(input_stream, "r", encoding=encoding) as reader:
306
+ while True:
307
+ offset = reader.tell()
308
+ value = reader.read()
309
+ if value is None:
310
+ return
311
+ yield offset, _kind_of(value), value
312
+
313
+
314
+ @data.command()
315
+ @click.argument(
316
+ "input_stream",
317
+ metavar="[INPUT]",
318
+ type=click.File("rb"),
319
+ default="-",
320
+ required=False,
321
+ )
322
+ @click.option(
323
+ "--encoding",
324
+ default="acorn",
325
+ show_default=True,
326
+ callback=_validate_encoding,
327
+ help="Text encoding of string records. Defaults to the BBC character set.",
328
+ )
329
+ def decode(input_stream, encoding: str) -> None:
330
+ """Decode a BBC BASIC data file to a JSON array of its values.
331
+
332
+ Reads a ``PRINT#``-tagged data file from INPUT and writes a JSON array
333
+ to standard output, one element per record: integers and reals as JSON
334
+ numbers, strings as JSON strings, and raw bytes as ``{"bytes": "hex"}``.
335
+ Reals keep their full ``float`` repr (e.g. ``5.0``) so they round-trip
336
+ back to reals rather than integers ::
337
+
338
+ oaknut-basic data decode scores.dat | jq '.[0]'
339
+
340
+ The output is consumed by ``oaknut-basic data encode`` to rebuild the
341
+ file byte-for-byte.
342
+ """
343
+ import json
344
+
345
+ payload = [value for _offset, _kind, value in _read_records(input_stream, encoding)]
346
+ click.echo(json.dumps(payload, ensure_ascii=False, indent=2))
347
+
348
+
349
+ @data.command()
350
+ @click.argument(
351
+ "input_stream",
352
+ metavar="[INPUT]",
353
+ type=click.File("rb"),
354
+ default="-",
355
+ required=False,
356
+ )
357
+ @click.argument(
358
+ "output_stream",
359
+ metavar="[OUTPUT]",
360
+ type=click.File("wb"),
361
+ default="-",
362
+ required=False,
363
+ )
364
+ @click.option(
365
+ "--encoding",
366
+ default="acorn",
367
+ show_default=True,
368
+ callback=_validate_encoding,
369
+ help="Text encoding for string records. Defaults to the BBC character set.",
370
+ )
371
+ def encode(input_stream, output_stream, encoding: str) -> None:
372
+ """Encode a JSON array of values into a BBC BASIC data file.
373
+
374
+ Reads the JSON array produced by ``oaknut-basic data decode`` from
375
+ INPUT and writes the tagged data file to OUTPUT. Each element becomes
376
+ one ``PRINT#`` record: a JSON integer becomes an integer, a JSON
377
+ number with a fractional part a real, a JSON string a string, and
378
+ ``{"bytes": "hex"}`` raw (untagged) bytes ::
379
+
380
+ echo '[42, "HELLO", 3.5]' | oaknut-basic data encode - scores.dat
381
+
382
+ A hand-authored real must carry a decimal point (``3.0``, not ``3``),
383
+ matching what ``decode`` emits.
384
+ """
385
+ import json
386
+
387
+ from oaknut.basic.datafile import open as open_datafile
388
+ from oaknut.exception import DataError
389
+
390
+ try:
391
+ payload = json.loads(input_stream.read())
392
+ except json.JSONDecodeError as error:
393
+ raise DataError(f"invalid JSON input: {error}") from error
394
+ if not isinstance(payload, list):
395
+ raise DataError("expected a JSON array of values")
396
+
397
+ with open_datafile(output_stream, "w", encoding=encoding) as writer:
398
+ for index, item in enumerate(payload):
399
+ if isinstance(item, bool):
400
+ raise DataError(f"item {index}: BBC BASIC has no boolean type")
401
+ if isinstance(item, int):
402
+ writer.write_int(item)
403
+ elif isinstance(item, float):
404
+ writer.write_float(item)
405
+ elif isinstance(item, str):
406
+ writer.write_str(item)
407
+ elif isinstance(item, dict) and set(item) == {"bytes"}:
408
+ try:
409
+ writer.write_bytes(bytes.fromhex(item["bytes"]))
410
+ except (ValueError, TypeError) as error:
411
+ raise DataError(f"item {index}: invalid hex in bytes value") from error
412
+ else:
413
+ raise DataError(f"item {index}: cannot encode JSON value {item!r}")
414
+
415
+
416
+ @data.command()
417
+ @click.argument(
418
+ "input_stream",
419
+ metavar="[INPUT]",
420
+ type=click.File("rb"),
421
+ default="-",
422
+ required=False,
423
+ )
424
+ @click.option(
425
+ "--encoding",
426
+ default="acorn",
427
+ show_default=True,
428
+ callback=_validate_encoding,
429
+ help="Text encoding of string records. Defaults to the BBC character set.",
430
+ )
431
+ @report_output(reports={"records": "The tagged records in the data file, in order."})
432
+ def inspect(input_stream, encoding: str):
433
+ """Show the records in a BBC BASIC data file as a table.
434
+
435
+ Reads a ``PRINT#``-tagged data file from INPUT and reports each record
436
+ with its byte offset, type and value ::
437
+
438
+ oaknut-basic data inspect scores.dat
439
+ oaknut-basic data inspect scores.dat --as json
440
+
441
+ Rendered through the shared report machinery, so ``--as display`` (the
442
+ default at a terminal), ``--as tsv`` (the default in a pipe) and
443
+ ``--as json`` all describe the same records; the JSON form carries the
444
+ faithful values for scripting.
445
+ """
446
+ from asyoulikeit.tabular_data import Importance, Report, Reports, TableContent
447
+
448
+ table = TableContent(title="BBC BASIC data file")
449
+ table.add_column("index", "#", header=True)
450
+ table.add_column("type", "Type")
451
+ table.add_column("value", "Value")
452
+ table.add_column("offset", "Offset", importance=Importance.DETAIL)
453
+ for index, (offset, kind, value) in enumerate(_read_records(input_stream, encoding)):
454
+ table.add_row(index=index, type=kind, value=value, offset=offset)
455
+ return Reports(records=Report(data=table))
456
+
457
+
278
458
  if __name__ == "__main__": # pragma: no cover
279
459
  cli()