nockasm 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nockasm-1.0.0.dist-info/METADATA +204 -0
- nockasm-1.0.0.dist-info/RECORD +5 -0
- nockasm-1.0.0.dist-info/WHEEL +4 -0
- nockasm-1.0.0.dist-info/entry_points.txt +2 -0
- nockasm.py +623 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: nockasm
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A thin macro expander from Nock Assembly to canonical Nock 4K
|
|
5
|
+
Author-email: Sigilante <davisneale@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.8
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
# Nock Assembly
|
|
11
|
+
|
|
12
|
+

|
|
13
|
+
|
|
14
|
+
Nock Assembly is a thin macro over [Nock ISA](https://nock.is) designed to make the language more legible for pedagogical purposes.
|
|
15
|
+
|
|
16
|
+
## Design
|
|
17
|
+
|
|
18
|
+
| | |
|
|
19
|
+
|---|---|
|
|
20
|
+
| Named opcodes | `(%inc .x)` instead of `[4 0 2]`. Pure lexical. |
|
|
21
|
+
| Axis schemas | `:subject {.a .b .c}` resolves `.a` `.b` `.c` to axes 2, 6, 7. Right-leaning by Hoon convention. |
|
|
22
|
+
| `#let .name = E in B`| Opcode-8 push. Tracks subject shift via `+peg(3, n)` so old names still resolve in body. |
|
|
23
|
+
| `#match E { ... }` | Scrutinee lifted once via opcode 8. Nested opcode-6 dispatch on literal patterns. Required `_ =>` default. |
|
|
24
|
+
| `; comments` | And whitespace. |
|
|
25
|
+
|
|
26
|
+
## Install / use
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from nockasm import expand
|
|
30
|
+
print(expand("(%inc (%self))"))
|
|
31
|
+
# [4 0 1]
|
|
32
|
+
|
|
33
|
+
print(expand("""
|
|
34
|
+
:subject {.tag .data}
|
|
35
|
+
#match .tag {
|
|
36
|
+
1 => (%inc .data)
|
|
37
|
+
2 => .data
|
|
38
|
+
_ => 0
|
|
39
|
+
}
|
|
40
|
+
"""))
|
|
41
|
+
# [8 [0 2] 6 [5 [1 1] 0 2] [4 0 7] 6 [5 [1 2] 0 2] [0 7] 1 0]
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
End-to-end with [`pinochle`](https://github.com/sigilante/pinochle):
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from pinochle import nock, parse_noun
|
|
48
|
+
from nockasm import expand
|
|
49
|
+
|
|
50
|
+
src = """
|
|
51
|
+
:subject {.before .target .after}
|
|
52
|
+
#let .next = (%inc .target) in
|
|
53
|
+
[.before .next .after]
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
formula = parse_noun(expand(src))
|
|
57
|
+
result = nock(parse_noun("[10 41 99]"), formula)
|
|
58
|
+
# result == [10 42 99]
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
At the CLI:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
python -m nockasm program.nasm # canonical flat
|
|
65
|
+
python -m nockasm --pretty program.nasm # explicit binary cells
|
|
66
|
+
echo "(%inc (%self))" | python -m nockasm
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Integration with the Nock kernel
|
|
70
|
+
|
|
71
|
+
Pinochle ships `nock-kernel` for Jupyter (`Nock 4K` kernel). It accepts
|
|
72
|
+
canonical Nock in `:formula` cells. Workflow today:
|
|
73
|
+
|
|
74
|
+
1. Write `.nasm` in a regular Python cell (or text editor).
|
|
75
|
+
2. Run `expand(src)` in a Python notebook to get canonical Nock.
|
|
76
|
+
3. Paste the result into a `:formula` cell in a Nock notebook.
|
|
77
|
+
|
|
78
|
+
A `:asm` cell magic for the Nock kernel that does this in one step is the
|
|
79
|
+
obvious next step. Roughly:
|
|
80
|
+
|
|
81
|
+
```python
|
|
82
|
+
# in pinochle/packages/nock_kernel/kernel.py
|
|
83
|
+
if cell.startswith(':asm'):
|
|
84
|
+
from nockasm import expand
|
|
85
|
+
formula_src = expand(cell[len(':asm'):])
|
|
86
|
+
# then dispatch as if user had typed ':formula <formula_src>'
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## Structural macros
|
|
90
|
+
|
|
91
|
+
### `#let .name = VALUE in BODY`
|
|
92
|
+
|
|
93
|
+
Pushes `VALUE` onto the subject via opcode 8 and binds `.name` to axis 2 in
|
|
94
|
+
`BODY`. Any axes that were already in scope are shifted rightward via
|
|
95
|
+
`+peg(3, axis)`, so the old names still resolve in the body.
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
:subject {.before .target .after}
|
|
99
|
+
#let .next = (%inc .target) in
|
|
100
|
+
[.before .next .after]
|
|
101
|
+
; -> [8 [4 0 6] [0 6] [0 2] 0 15]
|
|
102
|
+
; against [10 41 99] -> [10 42 99]
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
`VALUE` and `BODY` are both formula positions (bare atoms lift). Shadowing
|
|
106
|
+
an existing schema name is a compile error.
|
|
107
|
+
|
|
108
|
+
### `#match EXPR { PAT => BODY ... _ => DEFAULT }`
|
|
109
|
+
|
|
110
|
+
Pattern match on the value of `EXPR`. The scrutinee is evaluated once via
|
|
111
|
+
opcode 8 — i.e. lifted onto the subject — then each `PAT` is compared
|
|
112
|
+
against the lifted value via opcode 5 (eq), with opcode 6 (if) dispatching
|
|
113
|
+
to the matching `BODY`. The `_ =>` default is required.
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
:subject {.tag .data}
|
|
117
|
+
#match .tag {
|
|
118
|
+
1 => (%inc .data)
|
|
119
|
+
2 => .data
|
|
120
|
+
_ => 0
|
|
121
|
+
}
|
|
122
|
+
; -> [8 [0 2] 6 [5 [1 1] 0 2] [4 0 7] 6 [5 [1 2] 0 2] [0 7] 1 0]
|
|
123
|
+
; against [1 41] -> 42
|
|
124
|
+
; against [2 41] -> 41
|
|
125
|
+
; against [9 41] -> 0
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
`EXPR` and each `BODY` are formula positions. `PAT`s are *noun literals* —
|
|
129
|
+
they're compared against the scrutinee's runtime value, not against a
|
|
130
|
+
formula. Bare atoms in `PAT` position are not lifted: writing `1 => ...`
|
|
131
|
+
matches the atom `1`, not the formula `[1 1]`.
|
|
132
|
+
|
|
133
|
+
In the body of each arm (and the default), the scrutinee is at axis 2, and
|
|
134
|
+
the original schema axes are shifted rightward via `+peg(3, axis)` — same
|
|
135
|
+
shift rule as `#let`. That's why `.data` resolves to `[0 7]` (not `[0 3]`)
|
|
136
|
+
in the example above.
|
|
137
|
+
|
|
138
|
+
## What lifts and what doesn't
|
|
139
|
+
|
|
140
|
+
Bare atoms get lifted to `[1 atom]` in formula positions. Not in noun-literal
|
|
141
|
+
positions (`%const` arg, hint tag) or axis positions (`%slot` arg, `%call`
|
|
142
|
+
arity arg, etc.). The per-opcode kinds:
|
|
143
|
+
|
|
144
|
+
| Opcode | Kinds | Notes |
|
|
145
|
+
|-----------|-------|-------|
|
|
146
|
+
| `%slot N` | a | axis literal |
|
|
147
|
+
| `%const X`| n | any noun, no lift |
|
|
148
|
+
| `%arm X` | n | synonym for `%const`; intent: callable formula |
|
|
149
|
+
| `%crash` | — | `[0 0]` — Nock crash idiom |
|
|
150
|
+
| `%self` | — | `[0 1]` — whole subject |
|
|
151
|
+
| `%battery`| — | `[0 2]` — standard core battery |
|
|
152
|
+
| `%payload`| — | `[0 3]` — standard core payload |
|
|
153
|
+
| `%sample` | — | `[0 6]` — standard gate sample |
|
|
154
|
+
| `%context`| — | `[0 7]` — standard gate context |
|
|
155
|
+
| `%eval` | ff | both formulas |
|
|
156
|
+
| `%isa` | f | |
|
|
157
|
+
| `%inc` | f | |
|
|
158
|
+
| `%eq` | ff | |
|
|
159
|
+
| `%if` | fff | branches lift |
|
|
160
|
+
| `%comp` | ff | |
|
|
161
|
+
| `%push` | ff | |
|
|
162
|
+
| `%call N F`| af | |
|
|
163
|
+
| `%edit N V F`| aff | |
|
|
164
|
+
| `%hint T F` | nf | tag is a noun literal |
|
|
165
|
+
| `%hintd T C F` | nff | clue is a formula — per 4K spec it's evaluated |
|
|
166
|
+
|
|
167
|
+
The intent-marking opcodes (`%arm`, `%crash`, and the axis aliases) all lower
|
|
168
|
+
to the same cells as their `%const` / `%slot` equivalents — they exist purely
|
|
169
|
+
to surface meaning at the source level. `%arm X` is `%const X` for cases
|
|
170
|
+
where `X` is a formula that will later be invoked via `%call`; `%self`
|
|
171
|
+
through `%context` name the standard Hoon core/gate axes.
|
|
172
|
+
|
|
173
|
+
`#let` value and body are formulas. `#match` scrutinee and arm bodies are
|
|
174
|
+
formulas. Match *patterns* are noun literals (compared against the
|
|
175
|
+
scrutinee's evaluated value).
|
|
176
|
+
|
|
177
|
+
Raw cells `[...]` are taken structurally: their elements are *not* lifted.
|
|
178
|
+
That gives you an escape hatch into raw Nock when you need it, and the
|
|
179
|
+
cons-formula distribution pattern works as expected:
|
|
180
|
+
|
|
181
|
+
```
|
|
182
|
+
:subject {.a .b}
|
|
183
|
+
[(%inc .a) (%inc .b)]
|
|
184
|
+
; -> [[4 0 2] [4 0 3]]
|
|
185
|
+
; against [3 5] -> [4 6] via Nock distribution
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Tests
|
|
189
|
+
|
|
190
|
+
```bash
|
|
191
|
+
python test_nockasm.py # unit tests, 55 cases
|
|
192
|
+
python test_e2e.py # end-to-end: expand -> pinochle -> verify, 19 cases
|
|
193
|
+
python test_benchmarks.py # urbit/benchmark equivalents, 5 cases (loaded from disk)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
`test_benchmarks.py` reads `benchmarks/tests.json` and `benchmarks/<name>.nasm`
|
|
197
|
+
from disk and runs each through pinochle. The five benchmarks present
|
|
198
|
+
(`dec`, `add`, `factorial`, `fibonacci`, `ackermann`) are faithful
|
|
199
|
+
transcriptions of `urbit/benchmark/desk/bar/<name>.nock` — each `.nasm`
|
|
200
|
+
expands to a noun bit-identical to the corresponding `.nock` formula.
|
|
201
|
+
|
|
202
|
+
## License
|
|
203
|
+
|
|
204
|
+
MIT.
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
nockasm.py,sha256=xB4Lf_vt54evmdwg_2752j-KloM-dwVEQWZsmm1EWfA,21063
|
|
2
|
+
nockasm-1.0.0.dist-info/METADATA,sha256=_P7PoutxeVPaVicn_4K7lmAAI9zFMosG9e1MadALDQY,6685
|
|
3
|
+
nockasm-1.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
4
|
+
nockasm-1.0.0.dist-info/entry_points.txt,sha256=N1VHhXgUPGQeMZkywbwVXpADB6hMoXk3F4HhD0RZad4,41
|
|
5
|
+
nockasm-1.0.0.dist-info/RECORD,,
|
nockasm.py
ADDED
|
@@ -0,0 +1,623 @@
|
|
|
1
|
+
"""
|
|
2
|
+
nockasm: a thin macro expander from Nock Assembly to canonical Nock 4K.
|
|
3
|
+
|
|
4
|
+
Target: emits whitespace-separated, right-associated bracketed Nock
|
|
5
|
+
parseable by pinochle.parse_noun(). Use the assembler's output as the
|
|
6
|
+
:formula in a pinochle Jupyter kernel cell.
|
|
7
|
+
|
|
8
|
+
Syntax
|
|
9
|
+
======
|
|
10
|
+
|
|
11
|
+
; comments to end-of-line
|
|
12
|
+
; declare a subject axis schema (optional; if omitted, only raw cells
|
|
13
|
+
; and %opcode forms work — no .name references):
|
|
14
|
+
|
|
15
|
+
:subject SCHEMA
|
|
16
|
+
SCHEMA := LEAF | "{" LEAF+ "}"
|
|
17
|
+
LEAF := ".name" | SCHEMA
|
|
18
|
+
|
|
19
|
+
; flat schema lists are right-leaning by Hoon convention:
|
|
20
|
+
; {.a .b} -> .a=2, .b=3
|
|
21
|
+
; {.a .b .c} -> .a=2, .b=6, .c=7
|
|
22
|
+
; {{.a .b} .c} -> .a=4, .b=5, .c=3
|
|
23
|
+
|
|
24
|
+
; named opcodes (the only macro that's just lexing):
|
|
25
|
+
(%slot N) -> [0 N]
|
|
26
|
+
(%self) -> [0 1] ; whole subject
|
|
27
|
+
(%battery) -> [0 2] ; standard core battery axis
|
|
28
|
+
(%payload) -> [0 3] ; standard core payload axis
|
|
29
|
+
(%sample) -> [0 6] ; standard gate sample axis
|
|
30
|
+
(%context) -> [0 7] ; standard gate context axis
|
|
31
|
+
(%crash) -> [0 0] ; Nock crash idiom
|
|
32
|
+
(%const X) -> [1 X]
|
|
33
|
+
(%arm X) -> [1 X] ; intent-marker for callable formula
|
|
34
|
+
(%eval S F) -> [2 S F]
|
|
35
|
+
(%isa F) -> [3 F]
|
|
36
|
+
(%inc F) -> [4 F]
|
|
37
|
+
(%eq F G) -> [5 F G]
|
|
38
|
+
(%if C T E) -> [6 C T E]
|
|
39
|
+
(%comp F G) -> [7 F G]
|
|
40
|
+
(%push F G) -> [8 F G]
|
|
41
|
+
(%call N F) -> [9 N F]
|
|
42
|
+
(%edit N V F) -> [10 [N V] F]
|
|
43
|
+
(%hint T F) -> [11 T F] ; static hint
|
|
44
|
+
(%hintd T C F) -> [11 [T C] F] ; dynamic hint
|
|
45
|
+
|
|
46
|
+
; structural macros:
|
|
47
|
+
#let .name = EXPR in EXPR
|
|
48
|
+
; pushes EXPR onto the subject via opcode 8, binds .name to
|
|
49
|
+
; axis 2 in the body, shifts existing names rightward via +peg
|
|
50
|
+
|
|
51
|
+
#match EXPR { PAT => EXPR ... _ => EXPR }
|
|
52
|
+
; evaluates EXPR once via opcode 8, then nested opcode-6
|
|
53
|
+
; dispatches against each literal PAT; default _ is required
|
|
54
|
+
|
|
55
|
+
; axis references:
|
|
56
|
+
.name -> [0 axis] ; axis from current schema
|
|
57
|
+
|
|
58
|
+
; literal cells (no macro expansion inside, but sub-EXPRs are still
|
|
59
|
+
; expanded — use this to write raw Nock and to compose):
|
|
60
|
+
[a b c ...]
|
|
61
|
+
|
|
62
|
+
; atom literals:
|
|
63
|
+
42 ; decimal
|
|
64
|
+
1.000 ; decimal with thousands separator (Hoon-style)
|
|
65
|
+
0x2a ; hex
|
|
66
|
+
0x1.0000 ; hex with separator
|
|
67
|
+
'cord' ; little-endian byte-packed natural
|
|
68
|
+
|
|
69
|
+
API
|
|
70
|
+
===
|
|
71
|
+
|
|
72
|
+
expand(src, *, pretty=False) -> str
|
|
73
|
+
|
|
74
|
+
pretty=False (default): canonical flat form, e.g. "[8 [4 0 1] 5 [0 2] 0 3]"
|
|
75
|
+
pretty=True: explicit binary cells, e.g. "[8 [[4 [0 1]] [5 [[0 2] [0 3]]]]]"
|
|
76
|
+
|
|
77
|
+
expand_to_noun(src) -> Noun
|
|
78
|
+
Returns the Python nested-tuple noun directly, for in-process use
|
|
79
|
+
(e.g., feeding pinochle without an intermediate string round-trip).
|
|
80
|
+
|
|
81
|
+
Discipline
|
|
82
|
+
==========
|
|
83
|
+
|
|
84
|
+
The canonical Nock 4K specification is the truth. This assembler is a
|
|
85
|
+
text-to-canonical-tree convenience. Where this module's behavior would
|
|
86
|
+
contradict Nock 4K, Nock 4K wins; file a bug.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
from __future__ import annotations
|
|
90
|
+
import re
|
|
91
|
+
import sys
|
|
92
|
+
from typing import Union, Tuple, Dict, List, Optional, Any
|
|
93
|
+
|
|
94
|
+
__version__ = "1.0.0"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# ----------------------------------------------------------------------
|
|
98
|
+
# Noun representation
|
|
99
|
+
# ----------------------------------------------------------------------
|
|
100
|
+
|
|
101
|
+
# A Noun is either an int (atom) or a (head, tail) tuple (cell).
|
|
102
|
+
Noun = Union[int, Tuple[Any, Any]]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def cell(*elems) -> Noun:
|
|
106
|
+
"""Build a right-associated cell from >=2 elements."""
|
|
107
|
+
if len(elems) < 2:
|
|
108
|
+
raise ValueError("cell needs at least 2 elements")
|
|
109
|
+
if len(elems) == 2:
|
|
110
|
+
return (elems[0], elems[1])
|
|
111
|
+
return (elems[0], cell(*elems[1:]))
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def peg(a: int, b: int) -> int:
|
|
115
|
+
"""Hoon's +peg: re-root axis b inside subtree at axis a.
|
|
116
|
+
|
|
117
|
+
peg(a, 1) = a
|
|
118
|
+
peg(a, 2n) = 2 * peg(a, n)
|
|
119
|
+
peg(a, 2n+1) = 2 * peg(a, n) + 1
|
|
120
|
+
"""
|
|
121
|
+
if b < 1:
|
|
122
|
+
raise ValueError("axis must be >= 1")
|
|
123
|
+
if b == 1:
|
|
124
|
+
return a
|
|
125
|
+
if b % 2 == 0:
|
|
126
|
+
return 2 * peg(a, b // 2)
|
|
127
|
+
return 2 * peg(a, b // 2) + 1
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def cord_to_nat(s: str) -> int:
|
|
131
|
+
"""Pack an ASCII string as a little-endian natural."""
|
|
132
|
+
n = 0
|
|
133
|
+
for i, ch in enumerate(s):
|
|
134
|
+
n |= ord(ch) << (8 * i)
|
|
135
|
+
return n
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# ----------------------------------------------------------------------
|
|
139
|
+
# Tokenizer
|
|
140
|
+
# ----------------------------------------------------------------------
|
|
141
|
+
|
|
142
|
+
class Token:
|
|
143
|
+
__slots__ = ('kind', 'value', 'line', 'col')
|
|
144
|
+
|
|
145
|
+
def __init__(self, kind, value, line, col):
|
|
146
|
+
self.kind, self.value, self.line, self.col = kind, value, line, col
|
|
147
|
+
|
|
148
|
+
def __repr__(self):
|
|
149
|
+
return f"Token({self.kind}, {self.value!r}, L{self.line}:C{self.col})"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# Order matters: longer/more specific patterns first.
|
|
153
|
+
_TOKEN_RE = re.compile(r'''
|
|
154
|
+
(?P<COMMENT> ;[^\n]* )
|
|
155
|
+
| (?P<WS> [ \t\n\r]+ )
|
|
156
|
+
| (?P<ARROW> => )
|
|
157
|
+
| (?P<LPAREN> \( )
|
|
158
|
+
| (?P<RPAREN> \) )
|
|
159
|
+
| (?P<LCURLY> \{ )
|
|
160
|
+
| (?P<RCURLY> \} )
|
|
161
|
+
| (?P<LBRACK> \[ )
|
|
162
|
+
| (?P<RBRACK> \] )
|
|
163
|
+
| (?P<EQUALS> = )
|
|
164
|
+
| (?P<UNDER> _ )
|
|
165
|
+
| (?P<CORD> '[^']*' )
|
|
166
|
+
| (?P<HEX> 0x[0-9a-fA-F][0-9a-fA-F._]* )
|
|
167
|
+
| (?P<DEC> [0-9][0-9_.]* )
|
|
168
|
+
| (?P<AXIS> \.[A-Za-z_][A-Za-z0-9_-]* )
|
|
169
|
+
| (?P<OPCODE> %[A-Za-z_][A-Za-z0-9_-]* )
|
|
170
|
+
| (?P<MACRO> \#[A-Za-z_][A-Za-z0-9_-]* )
|
|
171
|
+
| (?P<DIRECTIVE> :[A-Za-z_][A-Za-z0-9_-]* )
|
|
172
|
+
| (?P<IDENT> [A-Za-z][A-Za-z0-9_-]* )
|
|
173
|
+
''', re.VERBOSE)
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def tokenize(src: str) -> List[Token]:
|
|
177
|
+
tokens = []
|
|
178
|
+
i = 0
|
|
179
|
+
line = 1
|
|
180
|
+
col = 1
|
|
181
|
+
while i < len(src):
|
|
182
|
+
m = _TOKEN_RE.match(src, i)
|
|
183
|
+
if m is None:
|
|
184
|
+
raise SyntaxError(
|
|
185
|
+
f"unexpected character {src[i]!r} at line {line} col {col}"
|
|
186
|
+
)
|
|
187
|
+
kind = m.lastgroup
|
|
188
|
+
text = m.group(0)
|
|
189
|
+
if kind not in ('COMMENT', 'WS'):
|
|
190
|
+
tokens.append(Token(kind, text, line, col))
|
|
191
|
+
for ch in text:
|
|
192
|
+
if ch == '\n':
|
|
193
|
+
line += 1
|
|
194
|
+
col = 1
|
|
195
|
+
else:
|
|
196
|
+
col += 1
|
|
197
|
+
i = m.end()
|
|
198
|
+
return tokens
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
# ----------------------------------------------------------------------
|
|
202
|
+
# AST
|
|
203
|
+
# ----------------------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
class Node:
|
|
206
|
+
pass
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
class IntAtom(Node):
|
|
210
|
+
def __init__(self, n): self.n = n
|
|
211
|
+
def __repr__(self): return f"IntAtom({self.n})"
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class CordAtom(Node):
|
|
215
|
+
def __init__(self, s): self.s = s
|
|
216
|
+
def __repr__(self): return f"CordAtom({self.s!r})"
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class AxisRef(Node):
|
|
220
|
+
def __init__(self, name): self.name = name
|
|
221
|
+
def __repr__(self): return f"AxisRef({self.name!r})"
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class RawCell(Node):
|
|
225
|
+
def __init__(self, elems): self.elems = elems
|
|
226
|
+
def __repr__(self): return f"RawCell({self.elems})"
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
class OpApp(Node):
|
|
230
|
+
def __init__(self, op, args):
|
|
231
|
+
self.op, self.args = op, args
|
|
232
|
+
def __repr__(self):
|
|
233
|
+
return f"OpApp({self.op}, {self.args})"
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
class LetForm(Node):
|
|
237
|
+
def __init__(self, name, value, body):
|
|
238
|
+
self.name, self.value, self.body = name, value, body
|
|
239
|
+
def __repr__(self):
|
|
240
|
+
return f"LetForm({self.name!r}, {self.value}, {self.body})"
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class MatchForm(Node):
|
|
244
|
+
def __init__(self, scrutinee, cases, default):
|
|
245
|
+
self.scrutinee, self.cases, self.default = scrutinee, cases, default
|
|
246
|
+
def __repr__(self):
|
|
247
|
+
return f"MatchForm({self.scrutinee}, {self.cases}, {self.default})"
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
# Schema is represented as either a string (".name") or a 2-tuple
|
|
251
|
+
# (head_schema, tail_schema).
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
# ----------------------------------------------------------------------
|
|
255
|
+
# Parser
|
|
256
|
+
# ----------------------------------------------------------------------
|
|
257
|
+
|
|
258
|
+
class Parser:
|
|
259
|
+
def __init__(self, tokens: List[Token]):
|
|
260
|
+
self.tokens = tokens
|
|
261
|
+
self.i = 0
|
|
262
|
+
|
|
263
|
+
def _peek(self, k: int = 0) -> Optional[Token]:
|
|
264
|
+
j = self.i + k
|
|
265
|
+
if j >= len(self.tokens):
|
|
266
|
+
return None
|
|
267
|
+
return self.tokens[j]
|
|
268
|
+
|
|
269
|
+
def _advance(self) -> Token:
|
|
270
|
+
t = self.tokens[self.i]
|
|
271
|
+
self.i += 1
|
|
272
|
+
return t
|
|
273
|
+
|
|
274
|
+
def _expect(self, kind: str) -> Token:
|
|
275
|
+
t = self._peek()
|
|
276
|
+
if t is None:
|
|
277
|
+
raise SyntaxError(f"expected {kind}, got EOF")
|
|
278
|
+
if t.kind != kind:
|
|
279
|
+
raise SyntaxError(
|
|
280
|
+
f"expected {kind}, got {t.kind} {t.value!r} at "
|
|
281
|
+
f"L{t.line}:C{t.col}"
|
|
282
|
+
)
|
|
283
|
+
return self._advance()
|
|
284
|
+
|
|
285
|
+
def parse_program(self):
|
|
286
|
+
"""Returns (schema_or_None, expr)."""
|
|
287
|
+
schema = None
|
|
288
|
+
t = self._peek()
|
|
289
|
+
if t is not None and t.kind == 'DIRECTIVE' and t.value == ':subject':
|
|
290
|
+
self._advance()
|
|
291
|
+
schema = self._parse_schema()
|
|
292
|
+
expr = self._parse_expr()
|
|
293
|
+
trailing = self._peek()
|
|
294
|
+
if trailing is not None:
|
|
295
|
+
raise SyntaxError(
|
|
296
|
+
f"trailing tokens after expression: {trailing!r}"
|
|
297
|
+
)
|
|
298
|
+
return schema, expr
|
|
299
|
+
|
|
300
|
+
def _parse_schema(self):
|
|
301
|
+
t = self._peek()
|
|
302
|
+
if t is None:
|
|
303
|
+
raise SyntaxError("expected schema, got EOF")
|
|
304
|
+
if t.kind == 'AXIS':
|
|
305
|
+
self._advance()
|
|
306
|
+
return t.value
|
|
307
|
+
if t.kind == 'LCURLY':
|
|
308
|
+
self._advance()
|
|
309
|
+
leaves = []
|
|
310
|
+
while True:
|
|
311
|
+
t2 = self._peek()
|
|
312
|
+
if t2 is None:
|
|
313
|
+
raise SyntaxError("unterminated schema")
|
|
314
|
+
if t2.kind == 'RCURLY':
|
|
315
|
+
self._advance()
|
|
316
|
+
break
|
|
317
|
+
leaves.append(self._parse_schema())
|
|
318
|
+
if not leaves:
|
|
319
|
+
raise SyntaxError("empty schema {}")
|
|
320
|
+
if len(leaves) == 1:
|
|
321
|
+
return leaves[0]
|
|
322
|
+
# Right-leaning cons of >=2 leaves
|
|
323
|
+
acc = leaves[-1]
|
|
324
|
+
for leaf in reversed(leaves[:-1]):
|
|
325
|
+
acc = (leaf, acc)
|
|
326
|
+
return acc
|
|
327
|
+
raise SyntaxError(
|
|
328
|
+
f"expected schema, got {t.kind} {t.value!r} at "
|
|
329
|
+
f"L{t.line}:C{t.col}"
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
def _parse_expr(self) -> Node:
|
|
333
|
+
t = self._peek()
|
|
334
|
+
if t is None:
|
|
335
|
+
raise SyntaxError("unexpected EOF in expression")
|
|
336
|
+
if t.kind == 'DEC':
|
|
337
|
+
self._advance()
|
|
338
|
+
return IntAtom(int(t.value.replace('_', '').replace('.', '')))
|
|
339
|
+
if t.kind == 'HEX':
|
|
340
|
+
self._advance()
|
|
341
|
+
return IntAtom(int(
|
|
342
|
+
t.value[2:].replace('.', '').replace('_', ''), 16
|
|
343
|
+
))
|
|
344
|
+
if t.kind == 'CORD':
|
|
345
|
+
self._advance()
|
|
346
|
+
return CordAtom(t.value[1:-1])
|
|
347
|
+
if t.kind == 'AXIS':
|
|
348
|
+
self._advance()
|
|
349
|
+
return AxisRef(t.value)
|
|
350
|
+
if t.kind == 'LBRACK':
|
|
351
|
+
return self._parse_raw_cell()
|
|
352
|
+
if t.kind == 'LPAREN':
|
|
353
|
+
return self._parse_op_app()
|
|
354
|
+
if t.kind == 'MACRO':
|
|
355
|
+
return self._parse_macro()
|
|
356
|
+
raise SyntaxError(
|
|
357
|
+
f"unexpected {t.kind} {t.value!r} at L{t.line}:C{t.col}"
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
def _parse_raw_cell(self) -> RawCell:
|
|
361
|
+
self._expect('LBRACK')
|
|
362
|
+
elems = []
|
|
363
|
+
while True:
|
|
364
|
+
t = self._peek()
|
|
365
|
+
if t is None:
|
|
366
|
+
raise SyntaxError("unterminated raw cell [")
|
|
367
|
+
if t.kind == 'RBRACK':
|
|
368
|
+
self._advance()
|
|
369
|
+
break
|
|
370
|
+
elems.append(self._parse_expr())
|
|
371
|
+
if len(elems) < 2:
|
|
372
|
+
raise SyntaxError("raw cell needs >=2 elements")
|
|
373
|
+
return RawCell(elems)
|
|
374
|
+
|
|
375
|
+
def _parse_op_app(self) -> OpApp:
|
|
376
|
+
self._expect('LPAREN')
|
|
377
|
+
t = self._peek()
|
|
378
|
+
if t is None or t.kind != 'OPCODE':
|
|
379
|
+
raise SyntaxError(
|
|
380
|
+
f"expected %opcode after '(', got {t!r}"
|
|
381
|
+
)
|
|
382
|
+
op = self._advance().value
|
|
383
|
+
args = []
|
|
384
|
+
while True:
|
|
385
|
+
t = self._peek()
|
|
386
|
+
if t is None:
|
|
387
|
+
raise SyntaxError("unterminated (")
|
|
388
|
+
if t.kind == 'RPAREN':
|
|
389
|
+
self._advance()
|
|
390
|
+
break
|
|
391
|
+
args.append(self._parse_expr())
|
|
392
|
+
return OpApp(op, args)
|
|
393
|
+
|
|
394
|
+
def _parse_macro(self) -> Node:
|
|
395
|
+
m = self._advance()
|
|
396
|
+
if m.value == '#let':
|
|
397
|
+
name = self._expect('AXIS').value
|
|
398
|
+
self._expect('EQUALS')
|
|
399
|
+
value = self._parse_expr()
|
|
400
|
+
kw = self._expect('IDENT')
|
|
401
|
+
if kw.value != 'in':
|
|
402
|
+
raise SyntaxError(
|
|
403
|
+
f"expected 'in' after #let value, got {kw.value!r}"
|
|
404
|
+
)
|
|
405
|
+
body = self._parse_expr()
|
|
406
|
+
return LetForm(name, value, body)
|
|
407
|
+
if m.value == '#match':
|
|
408
|
+
scrutinee = self._parse_expr()
|
|
409
|
+
self._expect('LCURLY')
|
|
410
|
+
cases = []
|
|
411
|
+
default = None
|
|
412
|
+
while True:
|
|
413
|
+
t = self._peek()
|
|
414
|
+
if t is None:
|
|
415
|
+
raise SyntaxError("unterminated #match {")
|
|
416
|
+
if t.kind == 'RCURLY':
|
|
417
|
+
self._advance()
|
|
418
|
+
break
|
|
419
|
+
if t.kind == 'UNDER':
|
|
420
|
+
self._advance()
|
|
421
|
+
self._expect('ARROW')
|
|
422
|
+
if default is not None:
|
|
423
|
+
raise SyntaxError("duplicate _ in #match")
|
|
424
|
+
default = self._parse_expr()
|
|
425
|
+
else:
|
|
426
|
+
pat = self._parse_expr()
|
|
427
|
+
self._expect('ARROW')
|
|
428
|
+
body = self._parse_expr()
|
|
429
|
+
cases.append((pat, body))
|
|
430
|
+
if default is None:
|
|
431
|
+
raise SyntaxError("#match requires a `_ => ...` default")
|
|
432
|
+
return MatchForm(scrutinee, cases, default)
|
|
433
|
+
raise SyntaxError(f"unknown macro {m.value}")
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
# ----------------------------------------------------------------------
|
|
437
|
+
# Expander
|
|
438
|
+
# ----------------------------------------------------------------------
|
|
439
|
+
|
|
440
|
+
class Expander:
|
|
441
|
+
# Per-opcode argument kinds:
|
|
442
|
+
# 'f' = formula position (bare atoms lifted to [1 atom])
|
|
443
|
+
# 'n' = noun-literal position (no lift; arbitrary noun)
|
|
444
|
+
# 'a' = axis position (must be atom; no lift)
|
|
445
|
+
OPS = {
|
|
446
|
+
# name -> (kinds, builder)
|
|
447
|
+
# 0-arg axis aliases for the standard Hoon core layout.
|
|
448
|
+
'%self': ('', lambda a: cell(0, 1)),
|
|
449
|
+
'%battery': ('', lambda a: cell(0, 2)),
|
|
450
|
+
'%payload': ('', lambda a: cell(0, 3)),
|
|
451
|
+
'%sample': ('', lambda a: cell(0, 6)),
|
|
452
|
+
'%context': ('', lambda a: cell(0, 7)),
|
|
453
|
+
'%slot': ('a', lambda a: cell(0, a[0])),
|
|
454
|
+
'%crash': ('', lambda a: cell(0, 0)),
|
|
455
|
+
'%const': ('n', lambda a: cell(1, a[0])),
|
|
456
|
+
'%arm': ('n', lambda a: cell(1, a[0])),
|
|
457
|
+
'%eval': ('ff', lambda a: cell(2, a[0], a[1])),
|
|
458
|
+
'%isa': ('f', lambda a: cell(3, a[0])),
|
|
459
|
+
'%inc': ('f', lambda a: cell(4, a[0])),
|
|
460
|
+
'%eq': ('ff', lambda a: cell(5, a[0], a[1])),
|
|
461
|
+
'%if': ('fff', lambda a: cell(6, a[0], a[1], a[2])),
|
|
462
|
+
'%comp': ('ff', lambda a: cell(7, a[0], a[1])),
|
|
463
|
+
'%push': ('ff', lambda a: cell(8, a[0], a[1])),
|
|
464
|
+
'%call': ('af', lambda a: cell(9, a[0], a[1])),
|
|
465
|
+
'%edit': ('aff', lambda a: cell(10, (a[0], a[1]), a[2])),
|
|
466
|
+
'%hint': ('nf', lambda a: cell(11, a[0], a[1])),
|
|
467
|
+
'%hintd': ('nff', lambda a: cell(11, (a[0], a[1]), a[2])),
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
def expand_program(self, schema, expr) -> Noun:
|
|
471
|
+
if schema is not None:
|
|
472
|
+
axes = self._resolve_schema(schema, 1)
|
|
473
|
+
else:
|
|
474
|
+
axes = {}
|
|
475
|
+
return self._expand(expr, axes)
|
|
476
|
+
|
|
477
|
+
def _resolve_schema(self, schema, base_axis: int) -> Dict[str, int]:
|
|
478
|
+
if isinstance(schema, str):
|
|
479
|
+
return {schema: base_axis}
|
|
480
|
+
head, tail = schema
|
|
481
|
+
d = {}
|
|
482
|
+
d.update(self._resolve_schema(head, 2 * base_axis))
|
|
483
|
+
for k, v in self._resolve_schema(tail, 2 * base_axis + 1).items():
|
|
484
|
+
if k in d:
|
|
485
|
+
raise SyntaxError(f"duplicate name in schema: {k}")
|
|
486
|
+
d[k] = v
|
|
487
|
+
return d
|
|
488
|
+
|
|
489
|
+
def _shift_right(self, axes: Dict[str, int]) -> Dict[str, int]:
|
|
490
|
+
"""When subject becomes [new old], old axis n -> peg(3, n)."""
|
|
491
|
+
return {name: peg(3, ax) for name, ax in axes.items()}
|
|
492
|
+
|
|
493
|
+
@staticmethod
|
|
494
|
+
def _lift(n: Noun) -> Noun:
|
|
495
|
+
"""If n is a bare atom, wrap it as the const-formula [1 atom]."""
|
|
496
|
+
if isinstance(n, int):
|
|
497
|
+
return cell(1, n)
|
|
498
|
+
return n
|
|
499
|
+
|
|
500
|
+
def _formula(self, e: Node, axes: Dict[str, int]) -> Noun:
|
|
501
|
+
"""Expand e and ensure the result is a valid Nock formula
|
|
502
|
+
(lifts bare atoms via [1 atom])."""
|
|
503
|
+
return self._lift(self._expand(e, axes))
|
|
504
|
+
|
|
505
|
+
def _expand(self, e: Node, axes: Dict[str, int]) -> Noun:
|
|
506
|
+
if isinstance(e, IntAtom):
|
|
507
|
+
return e.n
|
|
508
|
+
if isinstance(e, CordAtom):
|
|
509
|
+
return cord_to_nat(e.s)
|
|
510
|
+
if isinstance(e, AxisRef):
|
|
511
|
+
if e.name not in axes:
|
|
512
|
+
raise NameError(
|
|
513
|
+
f"unbound axis {e.name}; declared: "
|
|
514
|
+
f"{sorted(axes.keys()) or '(no :subject)'}"
|
|
515
|
+
)
|
|
516
|
+
return cell(0, axes[e.name])
|
|
517
|
+
if isinstance(e, RawCell):
|
|
518
|
+
return cell(*[self._expand(x, axes) for x in e.elems])
|
|
519
|
+
if isinstance(e, OpApp):
|
|
520
|
+
spec = self.OPS.get(e.op)
|
|
521
|
+
if spec is None:
|
|
522
|
+
raise NameError(f"unknown opcode {e.op}")
|
|
523
|
+
kinds, build = spec
|
|
524
|
+
if len(e.args) != len(kinds):
|
|
525
|
+
raise TypeError(
|
|
526
|
+
f"{e.op} takes {len(kinds)} args, got {len(e.args)}"
|
|
527
|
+
)
|
|
528
|
+
compiled = []
|
|
529
|
+
for arg, kind in zip(e.args, kinds):
|
|
530
|
+
v = self._expand(arg, axes)
|
|
531
|
+
if kind == 'f':
|
|
532
|
+
v = self._lift(v)
|
|
533
|
+
elif kind == 'a':
|
|
534
|
+
if not isinstance(v, int):
|
|
535
|
+
raise TypeError(
|
|
536
|
+
f"{e.op}: axis argument must be an atom, "
|
|
537
|
+
f"got cell {v!r}"
|
|
538
|
+
)
|
|
539
|
+
# 'n' accepts any noun unchanged
|
|
540
|
+
compiled.append(v)
|
|
541
|
+
return build(compiled)
|
|
542
|
+
if isinstance(e, LetForm):
|
|
543
|
+
# value compiled against OLD subject; lifted as formula
|
|
544
|
+
v = self._formula(e.value, axes)
|
|
545
|
+
new_axes = self._shift_right(axes)
|
|
546
|
+
if e.name in new_axes:
|
|
547
|
+
raise SyntaxError(f"#let shadows existing name {e.name}")
|
|
548
|
+
new_axes[e.name] = 2
|
|
549
|
+
b = self._formula(e.body, new_axes)
|
|
550
|
+
return cell(8, v, b)
|
|
551
|
+
if isinstance(e, MatchForm):
|
|
552
|
+
# Lift scrutinee via opcode 8 so it evaluates once; dispatch
|
|
553
|
+
# in augmented subject with scrutinee at axis 2.
|
|
554
|
+
s = self._formula(e.scrutinee, axes)
|
|
555
|
+
new_axes = self._shift_right(axes)
|
|
556
|
+
s_ref = cell(0, 2)
|
|
557
|
+
default = self._formula(e.default, new_axes)
|
|
558
|
+
result: Noun = default
|
|
559
|
+
for pat, body in reversed(e.cases):
|
|
560
|
+
# Pattern is a literal noun (no lift — it's the value
|
|
561
|
+
# being compared, wrapped in [1 ...] below).
|
|
562
|
+
pat_val = self._expand(pat, new_axes)
|
|
563
|
+
body_val = self._formula(body, new_axes)
|
|
564
|
+
# if pat == scrutinee then body else result
|
|
565
|
+
result = cell(6,
|
|
566
|
+
cell(5, cell(1, pat_val), s_ref),
|
|
567
|
+
body_val,
|
|
568
|
+
result)
|
|
569
|
+
return cell(8, s, result)
|
|
570
|
+
raise TypeError(f"unknown AST node {e!r}")
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
# ----------------------------------------------------------------------
|
|
574
|
+
# Printer
|
|
575
|
+
# ----------------------------------------------------------------------
|
|
576
|
+
|
|
577
|
+
def print_noun(n: Noun, pretty: bool = False) -> str:
|
|
578
|
+
if isinstance(n, int):
|
|
579
|
+
return str(n)
|
|
580
|
+
if pretty:
|
|
581
|
+
return f"[{print_noun(n[0], pretty)} {print_noun(n[1], pretty)}]"
|
|
582
|
+
# Canonical flat right-spine form
|
|
583
|
+
elems = []
|
|
584
|
+
while isinstance(n, tuple):
|
|
585
|
+
elems.append(n[0])
|
|
586
|
+
n = n[1]
|
|
587
|
+
elems.append(n)
|
|
588
|
+
return '[' + ' '.join(print_noun(e, pretty) for e in elems) + ']'
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
# ----------------------------------------------------------------------
|
|
592
|
+
# Public API
|
|
593
|
+
# ----------------------------------------------------------------------
|
|
594
|
+
|
|
595
|
+
def expand_to_noun(src: str) -> Noun:
|
|
596
|
+
toks = tokenize(src)
|
|
597
|
+
schema, expr = Parser(toks).parse_program()
|
|
598
|
+
return Expander().expand_program(schema, expr)
|
|
599
|
+
|
|
600
|
+
|
|
601
|
+
def expand(src: str, *, pretty: bool = False) -> str:
|
|
602
|
+
return print_noun(expand_to_noun(src), pretty=pretty)
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
# ----------------------------------------------------------------------
|
|
606
|
+
# CLI
|
|
607
|
+
# ----------------------------------------------------------------------
|
|
608
|
+
|
|
609
|
+
def _cli(argv):
|
|
610
|
+
pretty = '--pretty' in argv
|
|
611
|
+
paths = [a for a in argv[1:] if not a.startswith('--')]
|
|
612
|
+
if paths:
|
|
613
|
+
for p in paths:
|
|
614
|
+
with open(p) as f:
|
|
615
|
+
src = f.read()
|
|
616
|
+
print(expand(src, pretty=pretty))
|
|
617
|
+
else:
|
|
618
|
+
src = sys.stdin.read()
|
|
619
|
+
print(expand(src, pretty=pretty))
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
if __name__ == '__main__':
|
|
623
|
+
_cli(sys.argv)
|