co-lambda 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
co_lambda/_codec.py ADDED
@@ -0,0 +1,147 @@
1
+ """The input-side codec register: mechanical Python-data -> lambda-term encodings, plus readouts.
2
+
3
+ This module is one of the four strictly separated kinds (codec / sugar / runtime / pure-lambda
4
+ compiler source). Every function here is a MECHANICAL encoding or decoding rule between a Python
5
+ data structure and its Scott/Church representation; the loops and non-Builder parameters in this
6
+ module are the codec's data payloads, not term-construction macros. This is the closed register the
7
+ parameter rule refers to: a Builder-producing function with a non-Builder parameter is legal iff it
8
+ lives here (or in the ``_pyast`` codec). Additions are deliberate register changes.
9
+
10
+ The output-side codec (the reflective Scott Python-AST decoder, ``to_anf_source``, and
11
+ ``_church_to_int``) lives in ``_pyast``.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from co_lambda._ast import App, Lam, Node, Var, make_app, make_var
17
+ from co_lambda._dsl import Builder, app, lam
18
+
19
+ # --- Church numerals -------------------------------------------------------------------------------
20
+
21
+
22
+ def church(n: int) -> Builder:
23
+ """The Church numeral ``n`` = ``lambda s. lambda z. s (s ... (s z))`` (``n`` applications)."""
24
+ if n < 0:
25
+ raise ValueError("Church numerals are nonnegative")
26
+
27
+ def body(s: Builder, z: Builder) -> Builder:
28
+ acc = z
29
+ for _ in range(n):
30
+ acc = app(s, acc)
31
+ return acc
32
+
33
+ return lam(lambda s: lam(lambda z: body(s, z)))
34
+
35
+
36
+ # --- Scott lists and strings ------------------------------------------------------------------------
37
+ # The codec carries its own literal constructors for the representations it encodes to (a Scott list
38
+ # cell and the Scott booleans used as bits), so encoding depends on no lambda-source module.
39
+
40
+ _CODEC_SCOTT_CONS: Builder = lam(
41
+ lambda head: lam(lambda tail: lam(lambda on_cons: lam(lambda on_nil: app(app(on_cons, head), tail))))
42
+ )
43
+ _CODEC_SCOTT_NIL: Builder = lam(lambda on_cons: lam(lambda on_nil: on_nil))
44
+ _CODEC_TRUE: Builder = lam(lambda a: lam(lambda b: a))
45
+ _CODEC_FALSE: Builder = lam(lambda a: lam(lambda b: b))
46
+
47
+
48
+ def scott_list(elements: "list[Builder]") -> Builder:
49
+ """Encode a Python list of Builders as a Scott list."""
50
+ result: Builder = _CODEC_SCOTT_NIL
51
+ for element in reversed(elements):
52
+ result = app(app(_CODEC_SCOTT_CONS, element), result)
53
+ return result
54
+
55
+
56
+ def char_codes(text: str) -> Builder:
57
+ """The Scott list of character codes for a fixed Python string (a baked-in literal)."""
58
+ return scott_list([church(ord(character)) for character in text])
59
+
60
+
61
+ # --- binary naturals --------------------------------------------------------------------------------
62
+
63
+
64
+ def int_to_binnat(value: int) -> Builder:
65
+ """Encode a non-negative int as a BinNat (an LSB-first Scott list of Scott-boolean bits)."""
66
+ if value < 0:
67
+ raise ValueError("a BinNat is non-negative")
68
+ bits: "list[Builder]" = []
69
+ while value > 0:
70
+ bits.append(_CODEC_TRUE if value & 1 else _CODEC_FALSE)
71
+ value >>= 1
72
+ return scott_list(bits)
73
+
74
+
75
+ def binnat_list(values: "list[int]") -> Builder:
76
+ """Encode a list of non-negative ints as a Scott list of BinNats (an identifier's segments)."""
77
+ return scott_list([int_to_binnat(value) for value in values])
78
+
79
+
80
+ # --- quoted lambda terms ----------------------------------------------------------------------------
81
+ # The quoted-source data constructors (QVar/QLam/QApp), each a literal three-handler Scott shape, and
82
+ # the ``quote`` reflection from interpreter nodes into that representation.
83
+
84
+
85
+ def q_var(index: Builder) -> Builder:
86
+ return lam(lambda on_var: lam(lambda on_lam: lam(lambda on_app: app(on_var, index))))
87
+
88
+
89
+ def q_lam(body: Builder) -> Builder:
90
+ return lam(lambda on_var: lam(lambda on_lam: lam(lambda on_app: app(on_lam, body))))
91
+
92
+
93
+ def q_app(function: Builder, argument: Builder) -> Builder:
94
+ return lam(lambda on_var: lam(lambda on_lam: lam(lambda on_app: app(app(on_app, function), argument))))
95
+
96
+
97
+ def quote(node: Node) -> Builder:
98
+ """Reflect an interpreter lambda ``Node`` into a quoted-lambda Scott source term.
99
+
100
+ De Bruijn indices are Church-encoded (unary). For the defunctionalization compiler which operates
101
+ on BinNat indices, use ``quote_binnat`` instead.
102
+ """
103
+ match node:
104
+ case Var(index=index):
105
+ return q_var(church(index))
106
+ case Lam(body=body):
107
+ return q_lam(quote(body))
108
+ case App(function=function, argument=argument):
109
+ return q_app(quote(function), quote(argument))
110
+ case _:
111
+ raise ValueError(f"cannot quote {node!r}")
112
+
113
+
114
+ def quote_binnat(node: Node) -> Builder:
115
+ """Like ``quote`` but encodes de Bruijn indices as BinNats (O(log n) per index).
116
+
117
+ The defunctionalization compiler (``DEFUN``) should consume BinNat-quoted terms so that index
118
+ comparisons (equality, ordering) are O(log n) instead of the O(n) of Church-encoded indices.
119
+ """
120
+ match node:
121
+ case Var(index=index):
122
+ return q_var(int_to_binnat(index))
123
+ case Lam(body=body):
124
+ return q_lam(quote_binnat(body))
125
+ case App(function=function, argument=argument):
126
+ return q_app(quote_binnat(function), quote_binnat(argument))
127
+ case _:
128
+ raise ValueError(f"cannot quote {node!r}")
129
+
130
+
131
+ # --- Church-boolean readout -------------------------------------------------------------------------
132
+
133
+ _TRUE_MARKER = 7_100_001
134
+ _FALSE_MARKER = 7_100_002
135
+
136
+
137
+ def interpret_boolean(node: Node) -> bool:
138
+ """Observe a Church boolean by selecting between two distinct free-variable markers."""
139
+ applied = make_app(make_app(node, make_var(_TRUE_MARKER)), make_var(_FALSE_MARKER))
140
+ whnf = applied.weak_head_normal_form
141
+ match whnf:
142
+ case Var(index=index) if index == _TRUE_MARKER:
143
+ return True
144
+ case Var(index=index) if index == _FALSE_MARKER:
145
+ return False
146
+ case _:
147
+ raise ValueError(f"not a Church boolean: {whnf!r}")
@@ -0,0 +1,50 @@
1
+ """Regenerate the committed defunctionalized bootstrap compiler for the running Python version.
2
+
3
+ The defunctionalization compiler ``DEFUN`` is self-compiled and committed under ``_generated`` as a
4
+ self-contained module exposing ``compiled``; the benchmark imports it to measure the compiled compiler
5
+ without triggering any in-process compilation. Running this script (or ``co-lambda-regen-compiler``)
6
+ regenerates the committed module for the running Python version.
7
+
8
+ Generation recurses as deep as the DEFUN term itself and overflows the C stack on Python 3.12+; it
9
+ therefore runs in a large-stack thread, and the resulting committed module is 3.11-only for the same
10
+ reason.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from pathlib import Path
16
+
17
+ _GENERATED_DIR = Path(__file__).resolve().parent / "_generated"
18
+
19
+
20
+ def module_basename() -> str:
21
+ """The committed module file stem for the running Python version."""
22
+ from co_lambda._defun_runtime import _python_tag
23
+
24
+ return f"_generated_defun_compiler_{_python_tag()}"
25
+
26
+
27
+ def module_dotted_name() -> str:
28
+ """The importable dotted module name for the committed compiler artifact."""
29
+ return f"co_lambda._generated.{module_basename()}"
30
+
31
+
32
+ def generate() -> Path:
33
+ """Self-compile DEFUN and write the committed module for the running Python version."""
34
+ from co_lambda._defun_runtime import run_in_large_stack
35
+ from co_lambda._defunctionalize import defun_compiler_source
36
+
37
+ source = run_in_large_stack(defun_compiler_source)
38
+ path = _GENERATED_DIR / f"{module_basename()}.py"
39
+ path.write_text(source)
40
+ return path
41
+
42
+
43
+ def main() -> None:
44
+ """Regenerate the committed bootstrap compiler module for the running Python version."""
45
+ path = generate()
46
+ print(f"wrote {path.name} ({path.stat().st_size} bytes)")
47
+
48
+
49
+ if __name__ == "__main__":
50
+ main()
@@ -0,0 +1,321 @@
1
+ """The defunctionalization compiler, written in the pure lambda calculus.
2
+
3
+ The source is a quoted lambda term (Scott values over ``QVar i`` / ``QLam body`` / ``QApp f a``,
4
+ de Bruijn). ``DEFUN`` is a pure lambda term that maps the quoted source to a Scott-encoded
5
+ ``ast.Module`` of ``@interned @dataclass`` closure classes and a root expression. Each ``QLam``
6
+ becomes a dataclass whose fields are its free variables and whose ``__call__`` is the compiled
7
+ beta reduction; each ``QApp`` becomes ``Thunk(callee, argument)``.
8
+
9
+ This module is pure lambda calculus (one of the four strictly separated kinds: codec / sugar /
10
+ runtime / pure-lambda source): every top-level binding is a ``Builder``, written through the
11
+ ``_dsl``/``_sugar``/``_pybuild`` notation with ``_codec`` literal renderings.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from co_lambda._binnat import BIN_EQUAL, BIN_IS_ZERO, BIN_PRED, BIN_SUCC, BIN_ZERO
17
+ from co_lambda._codec import char_codes, int_to_binnat
18
+ from co_lambda._dsl import Builder, app, lam
19
+ from co_lambda._prelude import FALSE, SCOTT_NIL, TRUE, Y
20
+ from co_lambda._pybuild import (
21
+ ex_name,
22
+ field_node,
23
+ field_str,
24
+ name_gensym_field,
25
+ py_annassign,
26
+ py_attribute,
27
+ py_call,
28
+ py_classdef,
29
+ py_module,
30
+ st_assign,
31
+ st_func_def,
32
+ st_return,
33
+ stmt,
34
+ two_nodes,
35
+ )
36
+ from co_lambda._sugar import ap, cons, let, one, pair, pair_first, pair_second, two
37
+
38
+
39
+ # --- BinNat index comparison utilities --------------------------------------------------------
40
+ # De Bruijn indices are BinNat-encoded (O(log n) per index). Comparison uses the _binnat operation
41
+ # which is O(log n), vs O(n) for Church numeral arithmetic.
42
+
43
+ _EQ_IDX: Builder = BIN_EQUAL
44
+
45
+
46
+ # --- BinNat list utilities --------------------------------------------------------------------
47
+
48
+ DECREMENT_POSITIVE: Builder = app(Y, lam(lambda self_rec: lam(lambda xs: app(
49
+ app(
50
+ xs,
51
+ lam(lambda head: lam(lambda tail: app(
52
+ app(app(BIN_IS_ZERO, head),
53
+ app(self_rec, tail)),
54
+ lam(lambda c: lam(lambda n: app(app(c, app(BIN_PRED, head)), app(self_rec, tail)))),
55
+ ))),
56
+ ),
57
+ SCOTT_NIL,
58
+ ))))
59
+
60
+ LIST_APPEND: Builder = app(Y, lam(lambda self_rec: lam(lambda xs: lam(lambda ys: app(
61
+ app(
62
+ xs,
63
+ lam(lambda h: lam(lambda t: lam(lambda c: lam(lambda n: app(app(c, h), ap(self_rec, t, ys)))))),
64
+ ),
65
+ ys,
66
+ )))))
67
+
68
+ # Membership of a BinNat index in a BinNat list, as a Scott boolean.
69
+ MEMBER_IDX: Builder = app(Y, lam(lambda self_rec: lam(lambda x: lam(lambda xs: app(
70
+ app(
71
+ xs,
72
+ lam(lambda head: lam(lambda tail: app(
73
+ app(ap(_EQ_IDX, x, head), TRUE),
74
+ ap(self_rec, x, tail),
75
+ ))),
76
+ ),
77
+ FALSE,
78
+ )))))
79
+
80
+ # Order-preserving union: ``xs`` followed by each element of ``ys`` not already present, deduplicating
81
+ # by FIRST OCCURRENCE. Unlike a sorted merge, this keeps the order in which indices first appear, so a
82
+ # capture's field position follows its first use in the compiled body rather than its de Bruijn value.
83
+ ORDERED_UNION: Builder = app(Y, lam(lambda self_rec: lam(lambda xs: lam(lambda ys: app(
84
+ app(
85
+ ys,
86
+ lam(lambda head: lam(lambda tail: app(
87
+ app(ap(MEMBER_IDX, head, xs),
88
+ ap(self_rec, xs, tail)),
89
+ ap(self_rec, ap(LIST_APPEND, xs, one(head)), tail),
90
+ ))),
91
+ ),
92
+ xs,
93
+ )))))
94
+
95
+
96
+ # --- FREE_VARS: free de Bruijn indices of a quoted term, in first-occurrence order --------------
97
+ # A QApp emits ``Thunk(callee, argument)`` (callee subtree before argument subtree), so the free
98
+ # variables are collected callee-first; a capture's field position then follows the order in which it
99
+ # is first dereferenced in the compiled ``__call__`` body. PROCESS_FREE_VARS, MAKE_BODY_ENV, and
100
+ # MAP_FREE_VARS_TO_ARGS all derive from this one list, so fields, body-env lookups, and constructor
101
+ # arguments share the order automatically; content addressing then merges closures whose bodies use
102
+ # their captures in the same order even when the captured de Bruijn indices differ.
103
+
104
+ FREE_VARS: Builder = app(Y, lam(lambda self_rec: lam(lambda quoted: ap(
105
+ quoted,
106
+ lam(lambda index: lam(lambda c: lam(lambda n: app(app(c, index), SCOTT_NIL)))),
107
+ lam(lambda body: app(DECREMENT_POSITIVE, app(self_rec, body))),
108
+ lam(lambda function: lam(lambda argument: ap(
109
+ ORDERED_UNION, app(self_rec, function), app(self_rec, argument),
110
+ ))),
111
+ ))))
112
+
113
+
114
+ # --- Name constants for emitted code -----------------------------------------------------------
115
+
116
+ _INTERNED_CODES: Builder = char_codes("interned")
117
+ _LAMBDA_CODES: Builder = char_codes("Lambda")
118
+ _SELF_CODES: Builder = char_codes("self")
119
+ _A_CODES: Builder = char_codes("a")
120
+ _THUNK_CODES: Builder = char_codes("Thunk")
121
+ _COMPILED_CODES: Builder = char_codes("compiled")
122
+ _CALL_CODES: Builder = char_codes("__call__")
123
+
124
+ _INTERNED_NAME: Builder = ex_name(field_str(_INTERNED_CODES))
125
+ _LAMBDA_NAME: Builder = ex_name(field_str(_LAMBDA_CODES))
126
+ _SELF_NAME: Builder = ex_name(field_str(_SELF_CODES))
127
+ _THUNK_NAME: Builder = ex_name(field_str(_THUNK_CODES))
128
+
129
+ _A_FIELD: Builder = field_str(_A_CODES)
130
+ _SELF_FIELD: Builder = field_str(_SELF_CODES)
131
+
132
+ _KIND_CLASS: Builder = int_to_binnat(10)
133
+ _KIND_CAPTURE: Builder = int_to_binnat(11)
134
+
135
+ _DECORATOR_LIST: Builder = one(field_node(_INTERNED_NAME))
136
+
137
+
138
+ # --- PROCESS_FREE_VARS: build capture fields and the body-env lookup list together --------------
139
+ # This processes the free-var list once and produces a pair:
140
+ # (annassign_stmts, -- Scott list of stmt(AnnAssign) fields for the class body
141
+ # field_name_list) -- Scott list of (debruijn_index, field_name) pairs for body-env lookup
142
+ #
143
+ # Capture field names are content-addressable by POSITION ALONE, not by the owning QLam: a class's
144
+ # fields live in the class namespace, so the i-th capture is named identically in every class. This
145
+ # is what makes two closures of the same shape (same arity and same compiled body) but capturing
146
+ # variables at different de Bruijn depths compile to the SAME dataclass: their capture fields, their
147
+ # body env, and hence their whole class body are byte-identical (a coarser equivalence than the
148
+ # source QLam's node identity). The class NAME is then content-addressed by the compiled body (see
149
+ # the QLam case below), so identical bodies share one class.
150
+ #
151
+ # The capture name's payload is the EXACT SAME interned node in both the annotation and the body-env
152
+ # lookup because it is built once per free variable and threaded through both uses.
153
+
154
+ PROCESS_FREE_VARS: Builder = app(Y, lam(lambda self_rec: lam(
155
+ lambda position: lam(lambda free_vars: app(
156
+ app(
157
+ free_vars,
158
+ lam(lambda head: lam(lambda tail: let(
159
+ name_gensym_field(_KIND_CAPTURE, position),
160
+ lambda cap_name: let(
161
+ ap(self_rec, app(BIN_SUCC, position), tail),
162
+ lambda rest: pair(
163
+ cons(
164
+ stmt(py_annassign(ex_name(cap_name), _LAMBDA_NAME)),
165
+ pair_first(rest),
166
+ ),
167
+ cons(
168
+ pair(head, cap_name),
169
+ pair_second(rest),
170
+ ),
171
+ ),
172
+ ),
173
+ ))),
174
+ ),
175
+ pair(SCOTT_NIL, SCOTT_NIL),
176
+ )))))
177
+
178
+
179
+ # LOOKUP_FIELD_NAME: index -> field_name_list -> field_name
180
+ # Finds the field name for a given de Bruijn index in the list of (index, name) pairs.
181
+ LOOKUP_FIELD_NAME: Builder = app(Y, lam(lambda self_rec: lam(
182
+ lambda index: lam(lambda name_list: app(
183
+ app(
184
+ name_list,
185
+ lam(lambda head_pair: lam(lambda tail: app(
186
+ head_pair,
187
+ lam(lambda stored_index: lam(lambda field_name: app(
188
+ app(ap(_EQ_IDX, index, stored_index), field_name),
189
+ ap(self_rec, index, tail),
190
+ ))),
191
+ ))),
192
+ ),
193
+ field_str(char_codes("LOOKUP_FAILED")),
194
+ )))))
195
+
196
+
197
+ # MAKE_BODY_ENV: field_name_list -> (de Bruijn index -> Python expression)
198
+ # index 0 -> Name("a")
199
+ # index k > 0 -> self.<field_name for k-1 in field_name_list>
200
+ MAKE_BODY_ENV: Builder = lam(lambda field_name_list: lam(lambda index: app(
201
+ app(app(BIN_IS_ZERO, index),
202
+ ex_name(_A_FIELD)),
203
+ py_attribute(
204
+ _SELF_NAME,
205
+ ap(LOOKUP_FIELD_NAME, app(BIN_PRED, index), field_name_list),
206
+ ),
207
+ )))
208
+
209
+ # MAP_FREE_VARS_TO_ARGS: free_vars -> env -> Scott list of field_node(env(idx))
210
+ MAP_FREE_VARS_TO_ARGS: Builder = app(Y, lam(lambda self_rec: lam(
211
+ lambda free_vars: lam(lambda env: app(
212
+ app(
213
+ free_vars,
214
+ lam(lambda head: lam(lambda tail: cons(
215
+ field_node(app(env, head)),
216
+ ap(self_rec, tail, env),
217
+ ))),
218
+ ),
219
+ SCOTT_NIL,
220
+ )))))
221
+
222
+
223
+ # --- DEFUN_REC: the core compilation recursion --------------------------------------------------
224
+
225
+ _DEFUN_REC: Builder = app(Y, lam(lambda self_rec: lam(lambda quoted: ap(
226
+ quoted,
227
+
228
+ # QVar index: no defs, value = env(index)
229
+ lam(lambda index: pair(
230
+ lam(lambda rest: rest),
231
+ lam(lambda env: app(env, index)),
232
+ )),
233
+
234
+ # QLam body: class definition + constructor call. Capture fields are positional (self.cap_p),
235
+ # so two QLams of the same shape that capture variables at different de Bruijn depths produce
236
+ # byte-identical class bodies. The provisional class name keys on the source QLam node (giving
237
+ # deterministic names within one compile); the boundary's ``_canonicalize_classes`` then renames
238
+ # every class by the Merkle hash of its COMPILED body, merging the byte-identical ones. This is
239
+ # the coarser, compiled-form content addressing.
240
+ lam(lambda body: let(
241
+ app(self_rec, body),
242
+ lambda compiled_body: let(
243
+ app(FREE_VARS, quoted),
244
+ lambda free_vars: let(
245
+ ap(PROCESS_FREE_VARS, BIN_ZERO, free_vars),
246
+ lambda processed: let(
247
+ pair_first(processed),
248
+ lambda annassigns: let(
249
+ pair_second(processed),
250
+ lambda field_name_list: pair(
251
+ # defs: this class + body's defs
252
+ lam(lambda rest: cons(
253
+ stmt(py_classdef(
254
+ name_gensym_field(_KIND_CLASS, quoted),
255
+ _DECORATOR_LIST,
256
+ ap(
257
+ LIST_APPEND,
258
+ annassigns,
259
+ one(stmt(st_func_def(
260
+ field_str(_CALL_CODES),
261
+ two(_SELF_FIELD, _A_FIELD),
262
+ one(stmt(st_return(ap(
263
+ pair_second(compiled_body),
264
+ app(MAKE_BODY_ENV, field_name_list),
265
+ )))),
266
+ ))),
267
+ ),
268
+ )),
269
+ ap(pair_first(compiled_body), rest),
270
+ )),
271
+
272
+ # value: ClassName(env(fv_0), env(fv_1), ...)
273
+ lam(lambda env: py_call(
274
+ ex_name(name_gensym_field(_KIND_CLASS, quoted)),
275
+ ap(MAP_FREE_VARS_TO_ARGS, free_vars, env),
276
+ )),
277
+ ),
278
+ ),
279
+ ),
280
+ ),
281
+ ),
282
+ )),
283
+
284
+ # QApp f a: Thunk(callee, argument); defs = f's defs ++ a's defs
285
+ lam(lambda function: lam(lambda argument: let(
286
+ app(self_rec, function),
287
+ lambda compiled_f: let(
288
+ app(self_rec, argument),
289
+ lambda compiled_a: pair(
290
+ lam(lambda rest: ap(
291
+ pair_first(compiled_f),
292
+ ap(pair_first(compiled_a), rest),
293
+ )),
294
+ lam(lambda env: py_call(
295
+ _THUNK_NAME,
296
+ two_nodes(
297
+ ap(pair_second(compiled_f), env),
298
+ ap(pair_second(compiled_a), env),
299
+ ),
300
+ )),
301
+ ),
302
+ ),
303
+ ))),
304
+ ))))
305
+
306
+
307
+ # --- DEFUN: top-level entry point ---------------------------------------------------------------
308
+
309
+ DEFUN: Builder = lam(lambda quoted: let(
310
+ app(_DEFUN_REC, quoted),
311
+ lambda root: py_module(ap(
312
+ pair_first(root),
313
+ one(stmt(st_assign(
314
+ field_str(_COMPILED_CODES),
315
+ ap(
316
+ pair_second(root),
317
+ lam(lambda _index: ex_name(field_str(char_codes("UNREACHABLE")))),
318
+ ),
319
+ ))),
320
+ )),
321
+ ))