PyPI - oaknut-basic - Versions diffs - 12.10.0__tar.gz → 12.11.0__tar.gz - Mend

oaknut-basic 12.10.0tar.gz → 12.11.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{oaknut_basic-12.10.0/src/oaknut_basic.egg-info → oaknut_basic-12.11.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: oaknut-basic
-Version: 12.10.0
+Version: 12.11.0
 Summary: BBC BASIC tools: program tokeniser/de-tokeniser and PRINT#/INPUT# data-file reader/writer
 Author-email: Robert Smallshire <robert@smallshire.org.uk>
 License-Expression: MIT

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut/basic/__init__.py RENAMED Viewed

@@ -31,6 +31,7 @@ from oaknut.basic.datafile import (
     BbcBasicDataWriter,
 )
 from oaknut.basic.detokeniser import detokenise, detokenise_body
+from oaknut.basic.dialect import BASIC_II, BASIC_V, Dialect
 from oaknut.basic.exceptions import (
     AlreadyNumberedError,
     BASICError,
@@ -79,7 +80,7 @@ from oaknut.basic.tokens import (
     TOKEN_TO_KEYWORD,
 )
-__version__ = "12.10.0"
+__version__ = "12.11.0"
 # Canonical load addresses for BBC BASIC programs on each host.
 # Programs saved by *SAVE on a real machine use these by default.
@@ -87,6 +88,8 @@ BBC_BASIC_LOAD_ADDRESS = 0x1900
 ELECTRON_BASIC_LOAD_ADDRESS = 0x0E00
 __all__ = [
+    "BASIC_II",
+    "BASIC_V",
     "BBC_BASIC_LOAD_ADDRESS",
     "DEFAULT_LINE_NUMBER",
     "DEFAULT_LINE_STEP",
@@ -110,6 +113,7 @@ __all__ = [
     "DataFileError",
     "DataFileTypeMismatchError",
     "DetokeniseError",
+    "Dialect",
     "Float5RangeError",
     "IntegerRangeError",
     "InvalidLineLengthError",

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut/basic/cli.py RENAMED Viewed

@@ -260,7 +260,16 @@ def tokenise(
     help='Text encoding for the OUTPUT source. Use "acorn" for the BBC '
     "character set with CR line endings (e.g. writing back to a disc image).",
 )
-def detokenise(input_stream, output_stream, encoding: str) -> None:
+@click.option(
+    "--dialect",
+    type=click.Choice(["ii", "2", "v", "5"]),
+    default="ii",
+    show_default=True,
+    help='BBC BASIC dialect, as a Roman or Arabic numeral. Use "v" (or "5") for '
+    "Archimedes / RISC OS programs, so the &C6/&C7/&C8 escape tokens "
+    "(CASE, SYS, ORIGIN, ...) decode correctly.",
+)
+def detokenise(input_stream, output_stream, encoding: str, dialect: str) -> None:
     """De-tokenise a stored BBC BASIC program into source text.
     Reads a tokenised program from INPUT and writes numbered source text
@@ -273,10 +282,17 @@ def detokenise(input_stream, output_stream, encoding: str) -> None:
     OUTPUT is written in --encoding (``utf-8`` with host-native ``LF`` line
     endings by default, for a host text file; pass ``acorn`` for the BBC
     character set with ``CR`` endings, e.g. writing back to a disc image).
+    Pass ``--dialect v`` (or ``--dialect 5``) for Archimedes / RISC OS
+    programs (BBC BASIC V), whose extended keywords use the &C6/&C7/&C8
+    two-byte escape tokens. The Roman (``ii``/``v``) and Arabic
+    (``2``/``5``) numerals are interchangeable.
     """
+    from oaknut.basic import BASIC_II, BASIC_V
     from oaknut.basic import detokenise as detokenise_program
-    listing = detokenise_program(input_stream.read())
+    selected_dialect = BASIC_V if dialect in ("v", "5") else BASIC_II
+    listing = detokenise_program(input_stream.read(), dialect=selected_dialect)
     output_stream.write(_listing_to_bytes(listing, encoding))

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut/basic/detokeniser.py RENAMED Viewed

@@ -26,15 +26,21 @@ including) the next one, so ``length = 4 + len(body)``.
 from __future__ import annotations
+from oaknut.basic.dialect import BASIC_II, Dialect
 from oaknut.basic.scanner import scan, scan_program
-def detokenise(data: bytes) -> str:
-    """De-tokenise a BBC BASIC II program into source text.
+def detokenise(data: bytes, *, dialect: Dialect = BASIC_II) -> str:
+    """De-tokenise a stored BBC BASIC program into source text.
     Args:
         data: A tokenised program, as stored on disc or in memory,
             terminated by the ``&0D &FF`` end marker.
+        dialect: The BBC BASIC variant whose token tables to use.
+            Defaults to :data:`~oaknut.basic.BASIC_II`; pass
+            :data:`~oaknut.basic.BASIC_V` for Archimedes / RISC OS
+            programs so the ``&C6``/``&C7``/``&C8`` escape tokens and the
+            re-purposed single-byte tokens decode correctly.
     Returns:
         The program as numbered source text, lines separated by ``"\\n"``
@@ -47,11 +53,11 @@ def detokenise(data: bytes) -> str:
     """
     return "".join(
         f"{record.line_number}{''.join(token.value for token in record.tokens)}\n"
-        for record in scan_program(data)
+        for record in scan_program(data, dialect=dialect)
     )
-def detokenise_body(data: bytes) -> str:
+def detokenise_body(data: bytes, *, dialect: Dialect = BASIC_II) -> str:
     """De-tokenise an unframed line body into text.
     Unlike :func:`detokenise`, this expects a line *body* — inline token
@@ -62,6 +68,9 @@ def detokenise_body(data: bytes) -> str:
     Args:
         data: A line body of inline token bytes.
+        dialect: The BBC BASIC variant whose token tables to use, passed
+            through to :func:`scan`. Defaults to
+            :data:`~oaknut.basic.BASIC_II`.
     Returns:
         The body as source text, using latin-1/code-point semantics so it
@@ -70,4 +79,4 @@ def detokenise_body(data: bytes) -> str:
     Raises:
         DetokeniseError: A ``&8D`` line-number reference is truncated.
     """
-    return "".join(token.value for token in scan(data))
+    return "".join(token.value for token in scan(data, dialect=dialect))

oaknut_basic-12.11.0/src/oaknut/basic/dialect.py ADDED Viewed

@@ -0,0 +1,164 @@
+"""BBC BASIC dialects for de-tokenisation.
+The :mod:`~oaknut.basic.tokens` table is **BBC BASIC II** — the BBC
+Micro's 8-bit language ROM. BBC BASIC V (the Archimedes / RISC OS ARM
+BASIC) keeps that table but extends it two ways, and de-tokenising a
+BASIC V program with the BASIC II table corrupts every extended keyword:
+- Three BASIC II command tokens — ``&C6``, ``&C7``, ``&C8`` (``AUTO``,
+  ``DELETE``, ``LOAD``) — become **two-byte escape prefixes**. The byte
+  that follows selects an extended keyword, with the second byte counting
+  from ``&8E`` in each table: ``&C6`` for the extended *functions*
+  (``SUM``, ``BEAT``), ``&C7`` for the extended *commands* (``APPEND`` …
+  ``RENUMBER`` … ``INSTALL``), and ``&C8`` for the extended *statements*
+  (``CASE``, ``CIRCLE``, ``ORIGIN``, ``SYS`` …).
+- Several single-byte slots are re-purposed. ``&7F`` becomes
+  ``OTHERWISE``, and ``&C9``-``&CE`` — ``LIST``/``NEW``/``OLD``/
+  ``RENUMBER``/``SAVE`` and an unused gap in BASIC II — become the block
+  keywords ``WHEN``/``OF``/``ENDCASE``/``ELSE``/``ENDIF``/``ENDWHILE``
+  (the command spellings move into the ``&C7`` escape table).
+A :class:`Dialect` bundles the single-byte token map with the escape
+tables, so the scanner and de-tokeniser can target either language with
+one parameter. :data:`BASIC_II` is the default everywhere — code that
+does not name a dialect keeps its existing BASIC II behaviour.
+The token values were cross-checked against two independent
+implementations: Justin Fletcher's de-tokeniser tables (the
+``gerph/riscos-basic-detokenise`` ``c/basic`` source) and Steve Fryatt's
+``tokenize`` (``src/parse.c``), which agree byte-for-byte.
+"""
+from __future__ import annotations
+from collections.abc import Mapping
+from dataclasses import dataclass
+from types import MappingProxyType
+from oaknut.basic.tokens import TOKEN_TO_KEYWORD
+@dataclass(frozen=True)
+class Dialect:
+    """A BBC BASIC variant's token-to-keyword mapping.
+    Attributes:
+        name: Human-readable dialect name (e.g. ``"BBC BASIC V"``).
+        single_byte: Maps a single token byte to its keyword spelling.
+            Covers the whole one-byte keyword range, including the few
+            sub-``&80`` tokens such as ``&7F`` (``OTHERWISE``).
+        escape: Maps a two-byte escape *prefix* (``&C6``/``&C7``/``&C8``
+            in BASIC V) to a table from the *following* byte to its
+            extended keyword. Empty for dialects without escape tokens.
+    """
+    name: str
+    single_byte: Mapping[int, str]
+    escape: Mapping[int, Mapping[int, str]]
+# --- BBC BASIC II -----------------------------------------------------------
+#: BBC BASIC II — the BBC Micro's 8-bit language ROM. No escape tokens;
+#: the single-byte map is the canonical :data:`TOKEN_TO_KEYWORD` table.
+BASIC_II = Dialect(name="BBC BASIC II", single_byte=TOKEN_TO_KEYWORD, escape=MappingProxyType({}))
+# --- BBC BASIC V ------------------------------------------------------------
+# The extended-token tables, keyed by the byte that follows the prefix.
+# Each runs from &8E upward, in ROM order.
+_EXTENDED_FUNCTIONS = ("SUM", "BEAT")
+_EXTENDED_COMMANDS = (
+    "APPEND",
+    "AUTO",
+    "CRUNCH",
+    "DELETE",
+    "EDIT",
+    "HELP",
+    "LIST",
+    "LOAD",
+    "LVAR",
+    "NEW",
+    "OLD",
+    "RENUMBER",
+    "SAVE",
+    "TEXTLOAD",
+    "TEXTSAVE",
+    "TWIN",
+    "TWINO",
+    "INSTALL",
+)
+_EXTENDED_STATEMENTS = (
+    "CASE",
+    "CIRCLE",
+    "FILL",
+    "ORIGIN",
+    "POINT",
+    "RECTANGLE",
+    "SWAP",
+    "WHILE",
+    "WAIT",
+    "MOUSE",
+    "QUIT",
+    "SYS",
+    "INSTALL",
+    "LIBRARY",
+    "TINT",
+    "ELLIPSE",
+    "BEATS",
+    "TEMPO",
+    "VOICES",
+    "VOICE",
+    "STEREO",
+    "OVERLAY",
+)
+_EXTENDED_TOKEN_BASE = 0x8E
+def _extended_table(keywords: tuple[str, ...]) -> Mapping[int, str]:
+    """Build a {second byte -> keyword} table counting from ``&8E``."""
+    return MappingProxyType(
+        {_EXTENDED_TOKEN_BASE + offset: keyword for offset, keyword in enumerate(keywords)}
+    )
+# Single-byte tokens BASIC V re-purposes from BASIC II. &7F is new;
+# &C9-&CE displace the LIST/NEW/OLD/RENUMBER/SAVE command tokens (whose
+# spellings move into the &C7 escape table) and the unused &CE gap.
+_BASIC_V_SINGLE_BYTE_OVERRIDES = {
+    0x7F: "OTHERWISE",
+    0xC9: "WHEN",
+    0xCA: "OF",
+    0xCB: "ENDCASE",
+    0xCC: "ELSE",
+    0xCD: "ENDIF",
+    0xCE: "ENDWHILE",
+}
+# Start from BASIC II, drop the three bytes that are now escape prefixes
+# (so a bare prefix never resolves to AUTO/DELETE/LOAD), then apply the
+# re-purposed single-byte slots.
+_basic_v_single_byte = {
+    token: keyword
+    for token, keyword in TOKEN_TO_KEYWORD.items()
+    if token not in (0xC6, 0xC7, 0xC8)
+}
+_basic_v_single_byte.update(_BASIC_V_SINGLE_BYTE_OVERRIDES)
+#: BBC BASIC V — the Archimedes / RISC OS ARM BASIC. Adds the
+#: ``&C6``/``&C7``/``&C8`` escape tables and the re-purposed single-byte
+#: tokens described in this module's docstring.
+BASIC_V = Dialect(
+    name="BBC BASIC V",
+    single_byte=MappingProxyType(_basic_v_single_byte),
+    escape=MappingProxyType(
+        {
+            0xC6: _extended_table(_EXTENDED_FUNCTIONS),
+            0xC7: _extended_table(_EXTENDED_COMMANDS),
+            0xC8: _extended_table(_EXTENDED_STATEMENTS),
+        }
+    ),
+)

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut/basic/numbering.py RENAMED Viewed

@@ -14,9 +14,35 @@ stream plumbing on top.
 from __future__ import annotations
+import re
 DEFAULT_LINE_NUMBER = 10
 DEFAULT_LINE_STEP = 10
+# A BBC BASIC line ends only at a carriage return (or a host newline);
+# Python's str.splitlines() additionally breaks on &0B, &0C, &1C-&1E and
+# others, which occur legitimately inside string literals (mode-7 / VDU
+# control codes), so split strictly on the real terminators only. This
+# definition is shared with the tokeniser, which imports it from here.
+LINE_SEPARATOR_RE = re.compile(r"\r\n|\r|\n")
+def split_source_lines(source: str) -> list[str]:
+    """Split BBC BASIC source into lines on real terminators only.
+    Matches :meth:`str.splitlines` semantics for the terminators a BBC
+    BASIC line actually ends on (``"\\n"``, ``"\\r"``, ``"\\r\\n"``) — an
+    empty *source* yields no lines and a single trailing terminator is
+    not treated as introducing a final empty line — but, unlike
+    :meth:`str.splitlines`, never breaks on the in-string control codes
+    (&0B, &0C, &1C-&1E, ...) that BBC BASIC string literals legitimately
+    contain.
+    """
+    lines = LINE_SEPARATOR_RE.split(source)
+    if lines and lines[-1] == "":
+        lines.pop()
+    return lines
 def number_lines(
     source: str,
@@ -54,7 +80,7 @@ def number_lines(
     number = start
     numbered = []
-    for line in source.splitlines():
+    for line in split_source_lines(source):
         numbered.append(f"{number} {line}")
         number += step

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut/basic/scanner.py RENAMED Viewed

@@ -26,13 +26,14 @@ import enum
 from collections.abc import Iterator
 from dataclasses import dataclass
+from oaknut.basic.dialect import BASIC_II, Dialect
 from oaknut.basic.exceptions import (
     InvalidLineLengthError,
     MissingLineMarkerError,
     TruncatedProgramError,
 )
 from oaknut.basic.linenumber import decode_line_number
-from oaknut.basic.tokens import HEADER_LENGTH, LINE_NUMBER_TOKEN, TOKEN_TO_KEYWORD
+from oaknut.basic.tokens import HEADER_LENGTH, LINE_NUMBER_TOKEN
 _CR = 0x0D
 _END_MARKER = 0xFF
@@ -90,7 +91,7 @@ class LineRecord:
     start: int
-def scan(data: bytes, *, base_offset: int = 0) -> Iterator[Token]:
+def scan(data: bytes, *, base_offset: int = 0, dialect: Dialect = BASIC_II) -> Iterator[Token]:
     """Scan an unframed line body into a stream of :class:`Token`.
     Args:
@@ -99,10 +100,17 @@ def scan(data: bytes, *, base_offset: int = 0) -> Iterator[Token]:
         base_offset: Added to every token's :attr:`~Token.start`, so
             offsets can be reported against a larger buffer (for example
             the line body's position within a whole program).
+        dialect: The BBC BASIC variant whose token tables to use.
+            Defaults to :data:`~oaknut.basic.BASIC_II`; pass
+            :data:`~oaknut.basic.BASIC_V` to resolve the
+            ``&C6``/``&C7``/``&C8`` two-byte escape tokens and the
+            re-purposed single-byte tokens of the Archimedes language.
     Yields:
         One :class:`Token` per keyword, literal run, string literal, and
-        ``&8D`` line-number reference, in source order.
+        ``&8D`` line-number reference, in source order. For a two-byte
+        escape keyword the token's :attr:`~Token.token` is the prefix
+        byte and its :attr:`~Token.start` is the prefix's offset.
     Raises:
         DetokeniseError: A ``&8D`` reference is truncated.
@@ -141,13 +149,20 @@ def scan(data: bytes, *, base_offset: int = 0) -> Iterator[Token]:
             yield Token(TokenKind.LINENUM, str(decode_line_number(payload)), base_offset + i)
             i += 4
             continue
-        if b >= 0x80 and b in TOKEN_TO_KEYWORD:
+        if b in dialect.escape and i + 1 < n and data[i + 1] in dialect.escape[b]:
             yield from flush()
-            yield Token(TokenKind.KEYWORD, TOKEN_TO_KEYWORD[b], base_offset + i, b)
+            keyword = dialect.escape[b][data[i + 1]]
+            yield Token(TokenKind.KEYWORD, keyword, base_offset + i, b)
+            i += 2
+            continue
+        if b in dialect.single_byte:
+            yield from flush()
+            yield Token(TokenKind.KEYWORD, dialect.single_byte[b], base_offset + i, b)
             i += 1
             continue
-        # A literal byte (< &80) or an unknown high byte (e.g. the &CE
-        # gap): coalesce into a TEXT run so the byte still round-trips.
+        # A literal byte, an unknown high byte, or an escape prefix whose
+        # following byte has no extended-token meaning: coalesce into a
+        # TEXT run so the byte still round-trips.
         if text_start < 0:
             text_start = i
         text_chars.append(chr(b))
@@ -155,12 +170,15 @@ def scan(data: bytes, *, base_offset: int = 0) -> Iterator[Token]:
     yield from flush()
-def scan_program(data: bytes) -> Iterator[LineRecord]:
+def scan_program(data: bytes, *, dialect: Dialect = BASIC_II) -> Iterator[LineRecord]:
     """Walk a ``&0D``-framed stored program, scanning each line body.
     Args:
         data: A tokenised program as stored on disc or in memory,
             terminated by the ``&0D &FF`` end marker.
+        dialect: The BBC BASIC variant whose token tables to use, passed
+            through to :func:`scan`. Defaults to
+            :data:`~oaknut.basic.BASIC_II`.
     Yields:
         One :class:`LineRecord` per line, in storage order, each carrying
@@ -187,6 +205,6 @@ def scan_program(data: bytes) -> Iterator[LineRecord]:
         if length < HEADER_LENGTH or i + length > n:
             raise InvalidLineLengthError(offset=i, length=length)
         body = data[i + HEADER_LENGTH : i + length]
-        tokens = tuple(scan(body, base_offset=i + HEADER_LENGTH))
+        tokens = tuple(scan(body, base_offset=i + HEADER_LENGTH, dialect=dialect))
         yield LineRecord(line_number=line_number, tokens=tokens, start=i)
         i += length

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut/basic/tokeniser.py RENAMED Viewed

@@ -27,8 +27,6 @@ the CLI does it at its I/O boundary.
 from __future__ import annotations
-import re
 from oaknut.basic.exceptions import (
     AlreadyNumberedError,
     LineNumberOrderError,
@@ -37,7 +35,12 @@ from oaknut.basic.exceptions import (
     UnnumberedLineError,
 )
 from oaknut.basic.linenumber import MAX_LINE_NUMBER, encode_line_number
-from oaknut.basic.numbering import DEFAULT_LINE_NUMBER, DEFAULT_LINE_STEP, number_lines
+from oaknut.basic.numbering import (
+    DEFAULT_LINE_NUMBER,
+    DEFAULT_LINE_STEP,
+    LINE_SEPARATOR_RE,
+    number_lines,
+)
 from oaknut.basic.tokens import (
     FLAG_CONDITIONAL,
     FLAG_FN_PROC,
@@ -56,15 +59,9 @@ from oaknut.basic.tokens import (
 _CR = 0x0D
 _END_MARKER = 0xFF
-# A BBC BASIC line ends only at a carriage return (or a host newline);
-# Python's str.splitlines() additionally breaks on &0B, &0C, &1C-&1E and
-# others, which occur legitimately inside string literals (mode-7 / VDU
-# control codes), so split strictly on the real terminators only.
-_LINE_SEPARATOR_RE = re.compile(r"\r\n|\r|\n")
 def _split_source_lines(source: str) -> list[str]:
-    return _LINE_SEPARATOR_RE.split(source)
+    return LINE_SEPARATOR_RE.split(source)
 # Keyword entries grouped by first character, preserving ROM order within
 # each group, so the crunch only scans the relevant group.

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0/src/oaknut_basic.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: oaknut-basic
-Version: 12.10.0
+Version: 12.11.0
 Summary: BBC BASIC tools: program tokeniser/de-tokeniser and PRINT#/INPUT# data-file reader/writer
 Author-email: Robert Smallshire <robert@smallshire.org.uk>
 License-Expression: MIT

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut_basic.egg-info/SOURCES.txt RENAMED Viewed

@@ -5,6 +5,7 @@ src/oaknut/basic/__init__.py
 src/oaknut/basic/cli.py
 src/oaknut/basic/datafile.py
 src/oaknut/basic/detokeniser.py
+src/oaknut/basic/dialect.py
 src/oaknut/basic/exceptions.py
 src/oaknut/basic/float5.py
 src/oaknut/basic/linenumber.py
@@ -19,6 +20,7 @@ src/oaknut_basic.egg-info/entry_points.txt
 src/oaknut_basic.egg-info/requires.txt
 src/oaknut_basic.egg-info/top_level.txt
 tests/test_basic.py
+tests/test_basic_v.py
 tests/test_cli.py
 tests/test_crunch_rules.py
 tests/test_data_cli.py

oaknut_basic-12.11.0/tests/test_basic_v.py ADDED Viewed

@@ -0,0 +1,195 @@
+"""Tests for the BBC BASIC V (Archimedes/RISC OS) detokenising dialect.
+BASIC V re-uses three BASIC II command tokens — ``&C6``/``&C7``/``&C8`` —
+as two-byte *escape prefixes*: the following byte selects an extended
+keyword (``SUM``, ``RENUMBER``, ``CASE``/``SYS``/``ORIGIN`` ...). It also
+re-purposes several single-byte slots: ``&7F`` becomes ``OTHERWISE`` and
+``&C9``-``&CE`` — ``LIST``/``NEW``/``OLD``/``RENUMBER``/``SAVE`` in
+BASIC II — become the block keywords ``WHEN``/``OF``/``ENDCASE``/
+``ELSE``/``ENDIF``/``ENDWHILE``. The default dialect stays BASIC II, so
+the escape bytes still decode to ``AUTO``/``DELETE``/``LOAD`` unless the
+caller opts into :data:`BASIC_V`.
+"""
+import pytest
+from oaknut.basic import (
+    BASIC_II,
+    BASIC_V,
+    Dialect,
+    TokenKind,
+    detokenise,
+    detokenise_body,
+    scan,
+)
+def _program(*lines: tuple[int, bytes]) -> bytes:
+    """Frame (line_number, body) pairs into a tokenised program."""
+    out = bytearray()
+    for line_number, body in lines:
+        out += bytes((0x0D, (line_number >> 8) & 0xFF, line_number & 0xFF, 4 + len(body)))
+        out += body
+    out += b"\x0d\xff"
+    return bytes(out)
+class TestEscapeStatements:
+    """``&C8`` <byte> — extended statement tokens, second byte from &8E."""
+    @pytest.mark.parametrize(
+        ("second", "keyword"),
+        [
+            (0x8E, "CASE"),
+            (0x8F, "CIRCLE"),
+            (0x90, "FILL"),
+            (0x91, "ORIGIN"),
+            (0x92, "POINT"),
+            (0x93, "RECTANGLE"),
+            (0x94, "SWAP"),
+            (0x95, "WHILE"),
+            (0x96, "WAIT"),
+            (0x97, "MOUSE"),
+            (0x98, "QUIT"),
+            (0x99, "SYS"),
+            (0x9A, "INSTALL"),
+            (0x9B, "LIBRARY"),
+            (0x9C, "TINT"),
+            (0x9D, "ELLIPSE"),
+            (0x9E, "BEATS"),
+            (0x9F, "TEMPO"),
+            (0xA0, "VOICES"),
+            (0xA1, "VOICE"),
+            (0xA2, "STEREO"),
+            (0xA3, "OVERLAY"),
+        ],
+    )
+    def test_c8_escape_decodes_to_statement(self, second: int, keyword: str):
+        assert detokenise_body(bytes((0xC8, second)), dialect=BASIC_V) == keyword
+class TestEscapeCommands:
+    """``&C7`` <byte> — extended command tokens, second byte from &8E."""
+    @pytest.mark.parametrize(
+        ("second", "keyword"),
+        [
+            (0x8E, "APPEND"),
+            (0x8F, "AUTO"),
+            (0x90, "CRUNCH"),
+            (0x91, "DELETE"),
+            (0x92, "EDIT"),
+            (0x93, "HELP"),
+            (0x94, "LIST"),
+            (0x95, "LOAD"),
+            (0x96, "LVAR"),
+            (0x97, "NEW"),
+            (0x98, "OLD"),
+            (0x99, "RENUMBER"),
+            (0x9A, "SAVE"),
+            (0x9B, "TEXTLOAD"),
+            (0x9C, "TEXTSAVE"),
+            (0x9D, "TWIN"),
+            (0x9E, "TWINO"),
+            (0x9F, "INSTALL"),
+        ],
+    )
+    def test_c7_escape_decodes_to_command(self, second: int, keyword: str):
+        assert detokenise_body(bytes((0xC7, second)), dialect=BASIC_V) == keyword
+class TestEscapeFunctions:
+    """``&C6`` <byte> — the two extended function tokens."""
+    @pytest.mark.parametrize(("second", "keyword"), [(0x8E, "SUM"), (0x8F, "BEAT")])
+    def test_c6_escape_decodes_to_function(self, second: int, keyword: str):
+        assert detokenise_body(bytes((0xC6, second)), dialect=BASIC_V) == keyword
+class TestSingleByteDifferences:
+    """Single-byte tokens BASIC V re-purposes from BASIC II."""
+    @pytest.mark.parametrize(
+        ("token", "keyword"),
+        [
+            (0x7F, "OTHERWISE"),
+            (0xC9, "WHEN"),
+            (0xCA, "OF"),
+            (0xCB, "ENDCASE"),
+            (0xCC, "ELSE"),
+            (0xCD, "ENDIF"),
+            (0xCE, "ENDWHILE"),
+        ],
+    )
+    def test_single_byte_token_decodes_to_basic_v_keyword(self, token: int, keyword: str):
+        assert detokenise_body(bytes((token,)), dialect=BASIC_V) == keyword
+    def test_shared_single_byte_tokens_are_unchanged(self):
+        # Tokens common to both dialects still decode the same way.
+        assert detokenise_body(b"\xf1", dialect=BASIC_V) == "PRINT"
+        assert detokenise_body(b"\x8f", dialect=BASIC_V) == "PTR"
+class TestIssue44Reproduction:
+    def test_origin_statement_is_not_loadtime(self):
+        # Line 140 of "Herding (RR1)", Acorn User June 1991:
+        #   C8 91 " 640,512"  ->  ORIGIN 640,512   (was: LOADTIME 640,512)
+        body = bytes((0xC8, 0x91)) + b" 640,512"
+        assert detokenise_body(body, dialect=BASIC_V) == "ORIGIN 640,512"
+    def test_program_round_through_detokenise(self):
+        program = _program((140, bytes((0xC8, 0x91)) + b" 640,512"))
+        assert detokenise(program, dialect=BASIC_V) == "140ORIGIN 640,512\n"
+class TestDefaultDialectUnchanged:
+    """Without opting in, the escape bytes keep their BASIC II meaning."""
+    def test_c8_defaults_to_load_plus_token(self):
+        body = bytes((0xC8, 0x91)) + b" 640,512"
+        assert detokenise_body(body) == "LOADTIME 640,512"
+    def test_default_dialect_is_basic_ii(self):
+        body = bytes((0xC8, 0x91))
+        assert detokenise_body(body, dialect=BASIC_II) == detokenise_body(body)
+    def test_basic_v_c9_is_list_under_basic_ii(self):
+        assert detokenise_body(b"\xc9") == "LIST"
+class TestScanStream:
+    def test_escape_token_is_one_keyword_advancing_two_bytes(self):
+        body = bytes((0xC8, 0x99)) + b"X"  # SYS then a literal X
+        tokens = list(scan(body, dialect=BASIC_V))
+        assert tokens[0].kind is TokenKind.KEYWORD
+        assert tokens[0].value == "SYS"
+        assert tokens[0].start == 0
+        assert tokens[1].kind is TokenKind.TEXT
+        assert tokens[1].value == "X"
+        assert tokens[1].start == 2
+    def test_escape_byte_inside_string_stays_literal(self):
+        body = b'"' + bytes((0xC8, 0x91)) + b'"'
+        tokens = list(scan(body, dialect=BASIC_V))
+        assert len(tokens) == 1
+        assert tokens[0].kind is TokenKind.STRING
+        assert tokens[0].value == '"\xc8\x91"'
+    def test_unterminated_escape_prefix_stays_literal(self):
+        # A trailing &C8 with no following byte must not over-read.
+        tokens = list(scan(b"\xc8", dialect=BASIC_V))
+        assert tokens[0].kind is TokenKind.TEXT
+        assert tokens[0].value == "\xc8"
+class TestDialectObject:
+    def test_basic_v_has_a_name(self):
+        assert isinstance(BASIC_V, Dialect)
+        assert "V" in BASIC_V.name
+    def test_unknown_escape_second_byte_is_left_literal(self):
+        # &C8 followed by a byte with no extended-token meaning: emit the
+        # prefix as a literal rather than inventing a keyword or over-reading.
+        body = bytes((0xC8, 0x41))  # &41 = 'A', not an extended statement
+        tokens = list(scan(body, dialect=BASIC_V))
+        assert tokens[0].kind is TokenKind.TEXT
+        assert tokens[0].value.startswith("\xc8")

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_cli.py RENAMED Viewed

@@ -9,6 +9,7 @@ rather than re-testing the numbering or tokenising logic itself.
 from pathlib import Path
 import oaknut.basic as basic
+import pytest
 from click.testing import CliRunner
 from oaknut.basic.cli import cli
@@ -230,6 +231,31 @@ class TestDetokeniseCommand:
         assert result.exit_code != 0
         assert "offset 0" in result.output
+    @pytest.mark.parametrize("dialect", ["v", "5"])
+    def test_dialect_v_decodes_escape_tokens(self, dialect: str):
+        # &C8 &91 is the BASIC V ORIGIN statement, not "LOADTIME".
+        # The Roman "v" and Arabic "5" numerals are interchangeable.
+        program = b"\x0d\x00\x8c\x0e\xc8\x91 640,512\x0d\xff"
+        runner = CliRunner()
+        result = runner.invoke(cli, ["detokenise", "--dialect", dialect], input=program)
+        assert result.exit_code == 0
+        assert result.stdout_bytes == b"140ORIGIN 640,512\n"
+    @pytest.mark.parametrize("dialect", ["ii", "2"])
+    def test_dialect_ii_keeps_basic_ii_meaning(self, dialect: str):
+        program = b"\x0d\x00\x8c\x0e\xc8\x91 640,512\x0d\xff"
+        runner = CliRunner()
+        result = runner.invoke(cli, ["detokenise", "--dialect", dialect], input=program)
+        assert result.exit_code == 0
+        assert result.stdout_bytes == b"140LOADTIME 640,512\n"
+    def test_default_dialect_is_basic_ii(self):
+        program = b"\x0d\x00\x8c\x0e\xc8\x91 640,512\x0d\xff"
+        runner = CliRunner()
+        result = runner.invoke(cli, ["detokenise"], input=program)
+        assert result.exit_code == 0
+        assert result.stdout_bytes == b"140LOADTIME 640,512\n"
 class TestTokeniseDetokeniseRoundTrip:
     def test_cli_round_trip(self):

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_numbering.py RENAMED Viewed

@@ -47,6 +47,13 @@ class TestNumberLines:
     def test_blank_lines_are_numbered_too(self):
         assert basic.number_lines("A\n\nB") == "10 A\n20 \n30 B"
+    def test_control_codes_in_string_literal_do_not_split_the_line(self):
+        # VDU / mode-7 control codes (here &0C, &1C) appear legitimately
+        # inside string literals; str.splitlines() would break on them,
+        # corrupting a single program line into several numbered lines.
+        source = 'PRINT "' + chr(0x0C) + chr(0x1C) + 'banner"'
+        assert basic.number_lines(source) == '10 PRINT "\x0c\x1cbanner"'
     def test_internal_references_are_left_untouched(self):
         # The facade only prepends numbers; it does not rewrite GOTO etc.
         source = "GOTO 20\nEND"

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/LICENSE RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/README.md RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/pyproject.toml RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/setup.cfg RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut/basic/datafile.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut/basic/exceptions.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut/basic/float5.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut/basic/linenumber.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut/basic/tokens.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut_basic.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut_basic.egg-info/entry_points.txt RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut_basic.egg-info/requires.txt RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/src/oaknut_basic.egg-info/top_level.txt RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_basic.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_crunch_rules.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_data_cli.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_datafile.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_detokeniser.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_float5.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_keyword_coverage.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_linenumber.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_rom_golden.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_rom_golden_detokenise.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_scanner.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_tokeniser.py RENAMED Viewed

File without changes

{oaknut_basic-12.10.0 → oaknut_basic-12.11.0}/tests/test_tokens.py RENAMED Viewed

File without changes

oaknut-basic 12.10.0__tar.gz → 12.11.0__tar.gz

oaknut-basic 12.10.0tar.gz → 12.11.0tar.gz