llparse 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llparse/llparse.py ADDED
@@ -0,0 +1,150 @@
1
+ """"""
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Optional
5
+
6
+ from .C_compiler import CCompiler
7
+ from .frontend import (DEFAULT_MAX_TABLE_WIDTH, DEFAULT_MIN_TABLE_SIZE,
8
+ Frontend, IImplementation, source)
9
+ from .header import HeaderBuilder
10
+
11
+
12
+ @dataclass
13
+ class CompilerResult:
14
+ c: str
15
+ """Textual C code"""
16
+ header: str
17
+ """Textual C header file"""
18
+
19
+ # NOTE Coming soon... I will making a settings compiler which is attached to CompilerResult to Compile your own
20
+ # settings structures and be able to concate it to the existing header file like in llhttp
21
+ # I'm also in the works of adding a Cython .pxd import as well...
22
+
23
+
24
+ class Compiler:
25
+ """Used to Compile C code together"""
26
+
27
+ def __init__(
28
+ self,
29
+ prefix: str,
30
+ headerGuard: Optional[str] = None,
31
+ debug: Optional[str] = None,
32
+ maxTableElemWidth: Optional[int] = None,
33
+ minTableSize: Optional[int] = None,
34
+ ):
35
+ self.prefix = prefix
36
+ self.headerGuard = headerGuard
37
+ self.debug = debug
38
+ self.maxTableElemWidth = maxTableElemWidth
39
+ self.minTableSize = minTableSize
40
+
41
+ def to_frontend(
42
+ self,
43
+ root: source.code.Node,
44
+ properties: list[source.Property],
45
+ Impl: Optional[IImplementation] = IImplementation(),
46
+ ):
47
+ """compiles up the frontend and brings you back the frontend's results.
48
+ I added documentation to this function so that you can do creative things
49
+ with the library beyond C..."""
50
+ return Frontend(
51
+ self.prefix,
52
+ Impl,
53
+ options={
54
+ "maxTableElemWidth": self.maxTableElemWidth,
55
+ "minTableSize": self.minTableSize,
56
+ },
57
+ ).compile(root, properties)
58
+
59
+ def compile(
60
+ self,
61
+ root: source.code.Node,
62
+ properties: list[source.Property],
63
+ header_name: Optional[str] = None,
64
+ Impl: Optional[IImplementation] = IImplementation(),
65
+ ):
66
+ """Creates the C and header file..."""
67
+ info = self.to_frontend(root, properties, Impl)
68
+ hb = HeaderBuilder(self.prefix, self.headerGuard, properties, info.spans)
69
+ return CompilerResult(
70
+ CCompiler(header_name, self.debug).compile(info), hb.build()
71
+ )
72
+
73
+
74
+ class LLParse(source.Builder):
75
+ """
76
+
77
+ The prefix controls the names of methods and state struct in generated
78
+ public C headers:
79
+
80
+ ```c
81
+ // state struct
82
+ struct PREFIX_t {
83
+ ...
84
+ }
85
+
86
+ int PREFIX_init(PREFIX_t* state);
87
+ int PREFIX_execute(PREFIX_t* state, const char p, const char endp);
88
+ ```
89
+
90
+ `prefix` Prefix to be used when generating public API.
91
+ """
92
+
93
+ def __init__(self, prefix: str) -> None:
94
+ self.prefix = prefix
95
+ super().__init__()
96
+
97
+ def get_compiler(
98
+ self,
99
+ headerGuard: Optional[str] = None,
100
+ debug: Optional[str] = None,
101
+ maxTableElemWidth: Optional[int] = None,
102
+ minTableSize: Optional[int] = None,
103
+ ):
104
+ return Compiler(
105
+ self.prefix,
106
+ headerGuard,
107
+ debug,
108
+ maxTableElemWidth if maxTableElemWidth else DEFAULT_MAX_TABLE_WIDTH,
109
+ minTableSize if minTableSize else DEFAULT_MIN_TABLE_SIZE,
110
+ )
111
+
112
+ def build(
113
+ self,
114
+ root: source.code.Node,
115
+ headerGuard: Optional[str] = None,
116
+ debug: Optional[str] = None,
117
+ maxTableElemWidth: Optional[int] = None,
118
+ minTableSize: Optional[int] = None,
119
+ header_name: Optional[str] = None,
120
+ ):
121
+ """Builds Graph and then compiles the data into C code , returns with the header and C file inside of a Dataclass"""
122
+
123
+ compiler = Compiler(
124
+ self.prefix,
125
+ headerGuard,
126
+ debug,
127
+ maxTableElemWidth if maxTableElemWidth else DEFAULT_MAX_TABLE_WIDTH,
128
+ minTableSize if minTableSize else DEFAULT_MIN_TABLE_SIZE,
129
+ )
130
+
131
+ return compiler.compile(root, self.properties(), header_name=header_name)
132
+
133
+ def to_frontend(
134
+ self,
135
+ root: source.code.Node,
136
+ headerGuard: Optional[str] = None,
137
+ debug: Optional[str] = None,
138
+ maxTableElemWidth: Optional[int] = None,
139
+ minTableSize: Optional[int] = None,
140
+ ):
141
+ """Used as an external hack to get access to the frontend of llparse and extract
142
+ it's contents to compile the libraries you make other things like cython, This is not in llparse
143
+ specifically (Yet...)"""
144
+ return Compiler(
145
+ self.prefix,
146
+ headerGuard,
147
+ debug,
148
+ maxTableElemWidth if maxTableElemWidth else DEFAULT_MAX_TABLE_WIDTH,
149
+ minTableSize if minTableSize else DEFAULT_MIN_TABLE_SIZE,
150
+ ).to_frontend(root, self.properties)
@@ -0,0 +1,2 @@
1
+ from ..pybuilder.builder import *
2
+ from ..pybuilder.loopchecker import *
@@ -0,0 +1,318 @@
1
+ from typing import Literal, Optional, Union
2
+
3
+ import pyparse.pybuilder.main_code as code
4
+
5
+ # from pydot import graph_from_dot_data
6
+
7
+
8
+ i8 = "i8"
9
+ i16 = "i16"
10
+ i32 = "i32"
11
+ i64 = "i64"
12
+ ptr = "ptr"
13
+
14
+
15
+ PropertyTypes = [i8, i16, i32, i64, ptr]
16
+
17
+
18
+ class Property:
19
+ def __init__(self, ty: str, name: str) -> None:
20
+ if ty not in PropertyTypes:
21
+ raise Exception(
22
+ f"Can't use property : {ty} Because it is not a valid property type in ['i8' , 'i16' , 'i32' , 'i64' , 'ptr']"
23
+ )
24
+ if any([(True if n in name else False) for n in ["\\\\", "/", " "]]):
25
+ raise Exception(
26
+ f"Flag/Pointer Name:{name} cannot have spaces or other strange characters in C..."
27
+ )
28
+ self.name = name
29
+ self.ty = ty
30
+
31
+
32
+ class Creator:
33
+ """API for creating external callbacks and intrinsic operations."""
34
+
35
+ def __init__(self) -> None:
36
+ return
37
+
38
+ # TODO Make Sure python llparse match api is compatable with 3.10 and above...
39
+
40
+ # Thank goodness I can do C example documentation in here :) - Vizonex
41
+ def match(self, name: str):
42
+ """Create an external callback that **has no** `value` argument.
43
+
44
+ This callback can be used in all `Invoke` nodes except those that are
45
+ targets of `.select()` method.
46
+
47
+ C signature of callback must be:
48
+
49
+ ```c
50
+ int name(llparse_t* state, char* p, char* endp)
51
+ ```
52
+
53
+ Where `llparse_t` is parser state's type name.
54
+
55
+ Parameters
56
+ ----------
57
+ ----------
58
+ - `name` External function name.
59
+ """
60
+ return code._Match(name)
61
+
62
+ def value(self, name: str):
63
+ """
64
+
65
+ Create an external callback that **has** `value` argument.
66
+
67
+ This callback can be used only in `Invoke` nodes that are targets of
68
+ `.select()` method.
69
+
70
+ C signature of callback must be:
71
+
72
+ ```c
73
+ int name(llparse_t* state, char* p, char* endp, int value)
74
+ ```
75
+
76
+ Where `llparse_t` is parser state's type name.
77
+
78
+ Parameters
79
+ ----------
80
+ ----------
81
+ - `name` External function name.
82
+
83
+ """
84
+ return code.Value(name)
85
+
86
+ def span(self, name: str):
87
+ """Create an external span callback.
88
+
89
+ This callback can be used only in `Span` constructor.
90
+
91
+ The difference is that in typescript it's an Arbitrary Span
92
+ in python it's called SpanCallback to try not to be as confusing...
93
+
94
+ C signature of callback must be:
95
+
96
+ ```c
97
+ int name(llparse_t* state, char* p, char* endp)
98
+ ```
99
+
100
+ NOTE: non-zero return value is treated as resumable error.
101
+
102
+ Parameters
103
+ ----------
104
+ - `name` External function name.
105
+ """
106
+ return code._Span(name)
107
+
108
+ def store(self, field: str):
109
+ """
110
+ Intrinsic operation. Stores `value` from `.select()` node into the state's
111
+ property with the name specified by `field`, returns zero.
112
+
113
+ ```c
114
+ state[field] = value;
115
+ return 0;
116
+ ```
117
+
118
+ Parameters
119
+ ----------
120
+
121
+ - `field` Property name
122
+ """
123
+ return code.Store(field)
124
+
125
+ def load(self, field: str):
126
+ """Intrinsic operation. Loads and returns state's property with the name
127
+ specified by `field`.
128
+
129
+ The value of the property is either truncated or zero-extended to fit into
130
+ 32-bit unsigned integer.
131
+ ```c
132
+ return state[field];
133
+ ```
134
+ Parameters
135
+ ----------
136
+ `field` Property name.
137
+ """
138
+ return code.Load(field)
139
+
140
+ def mulAdd(
141
+ self, field: str, base: int, max: Optional[int] = None, signed: bool = False
142
+ ):
143
+ """Intrinsic operation. Takes `value` from `.select()`, state's property
144
+ with the name `field` and does:
145
+ ```c
146
+ field = state[field];
147
+ field *= options.base;
148
+ field += value;
149
+ state[field] = field;
150
+ return 0; // or 1 on overflow
151
+ ```
152
+ Return values are:
153
+
154
+ - 0 - success
155
+ - 1 - overflow
156
+
157
+ Parameters
158
+ ----------
159
+ ----------
160
+ Unlike in Typescript, The values of `IMulAddOptions` have been added here
161
+ since it's Python , not Typescript
162
+
163
+ - `field` Property name
164
+
165
+ - `base` Value to multiply the property with in the first step
166
+
167
+ - `max` Maximum value of the property. If at any point of computation the
168
+ intermediate result exceeds it - `mulAdd` returns 1 (overflow).
169
+
170
+ - `signed` If `true` - all arithmetics perfomed by `mulAdd` will be signed.
171
+ Default value: `false`"""
172
+ return code.MulAdd(field, base, max, signed)
173
+
174
+ def update(self, field: str, value: int):
175
+ """
176
+
177
+ Intrinsic operation. Puts `value` integer into the state's property with
178
+ the name specified by `field`.
179
+
180
+ state[field] = value;
181
+ return 0;
182
+
183
+ Parameters
184
+ ----------
185
+ ----------
186
+ - `field` Property name
187
+ - `value` Integer value to be stored into the property.
188
+ """
189
+ return code.Update(field, value)
190
+
191
+ def isEqual(self, field: str, value: str):
192
+ """Intrinsic operation.
193
+
194
+ state[field] &= value
195
+ return 0;
196
+
197
+ Parameters
198
+ ----------
199
+ ----------
200
+ - `field` Property name
201
+ - `value` Integer value
202
+ """
203
+ return code.IsEqual(field, value)
204
+
205
+ # NOTE : Unlike in typescript lowercase "and" & "or"
206
+ # cannot be used this might have to be
207
+ # throughly addressed - Vizonex
208
+ def And(self, field: str, value: int):
209
+ """Intrinsic operation.
210
+
211
+ state[field] &= value
212
+ return 0;
213
+
214
+ Parameters
215
+ ----------
216
+ ----------
217
+ - `field` Property name
218
+ - `value` Integer value"""
219
+
220
+ return code.And(field, value)
221
+
222
+ def Or(self, field: str, value: int):
223
+ """
224
+ Intrinsic operation.
225
+
226
+ state[field] |= value
227
+ return 0;
228
+
229
+ Parameters
230
+ ----------
231
+ ----------
232
+ - `field` Property name
233
+ - `value` Integer value
234
+
235
+ This will allow us to set our own flags at will
236
+ """
237
+ return code.Or(field, value)
238
+
239
+ def test(self, field: str, value: str):
240
+ """Intrinsic operation.
241
+
242
+ return (state[field] & value) == value ? 1 : 0;
243
+
244
+ Parameters
245
+ ----------
246
+ ----------
247
+ - `field` Property name
248
+ - `value` Integer value
249
+ """
250
+ return code.Test(field, value)
251
+
252
+
253
+ # NOTE: I have Nodes and Codes in the same file called `main_code`
254
+ # as a tiny convienience for the sake a protability of not wanting
255
+ # to Cause Hell for those wishing to move files to other folder
256
+ # quickly this was done during development
257
+ # as a caution of not needing to
258
+ # open too many ides and numerous windows - Vizonex
259
+ # so node.Match in typescript is code.Match in python...
260
+
261
+
262
+ # TODO (Vizonex) Add more Documentation later , I got tired of it...
263
+ class Builder:
264
+ def __init__(self) -> None:
265
+ self.code = Creator()
266
+ " API for creating external callbacks and intrinsic operations."
267
+ self.transform = code.TransfromCreator()
268
+ self.privProperties: dict[str, Property] = {}
269
+
270
+ def node(self, name: str):
271
+ return code.Match(name)
272
+
273
+ def error(self, errorCode: int, reason: str):
274
+ return code.Error(errorCode, reason)
275
+
276
+ def invoke(
277
+ self,
278
+ fn: code.Code,
279
+ Map: Union[dict[int, code.Node], code.Node, None] = None,
280
+ otherwise: Optional[code.Node] = None,
281
+ ):
282
+ if not Map:
283
+ res = code.Invoke(fn, {})
284
+
285
+ elif isinstance(Map, code.Node):
286
+ res = code.Invoke(fn, {})
287
+ otherwise = Map
288
+
289
+ else:
290
+ res = code.Invoke(fn, Map)
291
+
292
+ if otherwise:
293
+ res.otherwise(otherwise)
294
+
295
+ return res
296
+
297
+ def consume(self, field: str):
298
+ return code.Comsume(field)
299
+
300
+ def pause(self, errorCode: int, reason: str):
301
+ return code.Pause(errorCode, reason)
302
+
303
+ # NOTE SpanCallback Can Really be any node, just needed to Calrify that - Vizonex
304
+ def span(self, callback: code._Span):
305
+ return code.Span(callback)
306
+
307
+ def property(self, ty: Literal["i8", "i16", "i32", "i64", "ptr"], name: str):
308
+ if ty not in PropertyTypes:
309
+ raise TypeError(f"ty:{ty} is not an existing Parser Property")
310
+
311
+ if self.privProperties.get(name):
312
+ raise RuntimeError(f"Duplicate property with name:{name}")
313
+
314
+ self.privProperties[name] = Property(ty, name)
315
+
316
+ def properties(self) -> list[Property]:
317
+ """Return list of all allocated properties in parser's state."""
318
+ return list(self.privProperties.values())
@@ -0,0 +1,246 @@
1
+ from typing import Any, Literal, Union
2
+
3
+ from ..pybuilder.main_code import Node
4
+
5
+ MAX_VALUE = 256
6
+ WORD_SIZE = 32
7
+ SIZE = MAX_VALUE // WORD_SIZE
8
+ WORD_FILL = -1 | 0
9
+
10
+ assert MAX_VALUE % WORD_SIZE == 0
11
+
12
+
13
+ # NOTE I think it's theroetically Possible to
14
+ # Put Lattice Class into Cython as an optional replacement
15
+ # for the Sake of speed in the near future once I've
16
+ # figured out everything myself from a more deep brain
17
+ # point of view/prespective... - Vizonex
18
+
19
+
20
+ class Lattice:
21
+ def __init__(
22
+ self, value: Union[Any, list[int], Literal["empty"], Literal["any"]]
23
+ ) -> None:
24
+ self.value = value
25
+ self.words: list[int] = []
26
+
27
+ # allocate space by filling in data with zeros...
28
+
29
+ for _ in range(SIZE):
30
+ self.words.append(0)
31
+
32
+ if len(value) > 1:
33
+ for single in value:
34
+ self.add(single)
35
+
36
+ def __iter__(self):
37
+ for i in range(MAX_VALUE):
38
+ if self.check(i):
39
+ yield i
40
+
41
+ def check(self, bit: int):
42
+ if not (0 <= bit and bit < MAX_VALUE):
43
+ raise AssertionError("Invalid Bit")
44
+ index = (bit // WORD_SIZE) | 0
45
+ off = bit % WORD_SIZE
46
+ return self.words[index] & (1 << off) != 0
47
+
48
+ def add(self, bit: int):
49
+ bit = ord(bit) if isinstance(bit, str) else bit
50
+ if not (0 <= bit and bit < MAX_VALUE):
51
+ raise AssertionError("Invalid Bit")
52
+
53
+ index = bit // WORD_SIZE
54
+ off = bit % WORD_SIZE
55
+
56
+ self.words[index] |= 1 << off
57
+
58
+ def union(self, other: "Lattice") -> "Lattice":
59
+ result = Lattice("empty")
60
+
61
+ for i in range(SIZE):
62
+ result.words[i] = self.words[i] | other.words[i]
63
+
64
+ return result
65
+
66
+ def intersect(self, other: "Lattice") -> "Lattice":
67
+ result = Lattice("empty")
68
+ for i in range(SIZE):
69
+ result.words[i] = self.words[i] & other.words[i]
70
+ return result
71
+
72
+ def subtract(self, other: "Lattice") -> "Lattice":
73
+ result = Lattice("empty")
74
+ for i in range(SIZE):
75
+ result.words[i] = self.words[i] & (~other.words[i])
76
+ return result
77
+
78
+ def isEqual(self, other: "Lattice"):
79
+ return True if (self.value == other.value) else False
80
+
81
+ def toJSON(self):
82
+ isEmpty = True
83
+ isFull = True
84
+ for i in range(SIZE):
85
+ if self.words[i] != 0:
86
+ isEmpty = False
87
+ if self.words[i] != WORD_FILL:
88
+ isFull = False
89
+ if isEmpty:
90
+ return "empty"
91
+ if isFull:
92
+ return "any"
93
+ return list(self)
94
+
95
+
96
+ class Reachability:
97
+ def __init__(self) -> None:
98
+ return
99
+
100
+ def build(self, root: Node) -> list[Node]:
101
+ res: set[Node] = set()
102
+ queue = [root]
103
+ while len(queue) != 0:
104
+ node = queue.pop()
105
+ if node in res:
106
+ continue
107
+ res.add(node)
108
+ for edge in node:
109
+ queue.append(edge.node)
110
+ otherwise = node.getOtherwiseEdge()
111
+ if otherwise:
112
+ queue.append(otherwise.node)
113
+
114
+ return list(res)
115
+
116
+
117
+ EMPTY_VALUE = Lattice("empty")
118
+ ANY_VALUE = Lattice("any")
119
+
120
+
121
+ class LoopChecker:
122
+ def __init__(self) -> None:
123
+ self.lattice: dict[Node, Lattice] = {}
124
+ self.terminatedCache: dict[Node, Lattice] = {}
125
+
126
+ def clear(self, nodes: list[Node]):
127
+ self.lattice.update({(node, EMPTY_VALUE) for node in nodes})
128
+
129
+ def check(self, root: Node):
130
+ r = Reachability()
131
+ nodes = r.build(root)
132
+ for node in nodes:
133
+ self.clear(nodes)
134
+ self.lattice[node] = ANY_VALUE
135
+ changed: set[Node] = set([root])
136
+ while len(changed) != 0:
137
+ next = set()
138
+ for changedNode in next:
139
+ self.propagate(changedNode, next)
140
+ changed = next
141
+
142
+ self.visit(root, list(changed))
143
+
144
+ def propagate(self, node: Node, changed: set[Node]):
145
+ value = self.lattice[node]
146
+ terminated: "Lattice" = self.terminate(node, value, changed)
147
+ if terminated.value == EMPTY_VALUE.value:
148
+ value = value.subtract(terminated)
149
+ if value.isEqual(EMPTY_VALUE):
150
+ return
151
+
152
+ keysbyTarget: dict[Node, Lattice] = dict()
153
+
154
+ for edge in node.getAllEdges():
155
+ if not edge.noAdvance:
156
+ continue
157
+
158
+ targetValue: Lattice
159
+ if keysbyTarget.get(edge.node):
160
+ targetValue = keysbyTarget[edge.node]
161
+ else:
162
+ targetValue = self.lattice.get(edge.node)
163
+
164
+ if edge.key is None or isinstance(edge.key, int):
165
+ targetValue = targetValue.union(value)
166
+ else:
167
+ # From peek()
168
+ edgeValue = Lattice([edge.key[0]]).intersect(value)
169
+ if edgeValue.isEqual(EMPTY_VALUE):
170
+ continue
171
+
172
+ targetValue = targetValue.union(edgeValue)
173
+ keysbyTarget[edge.node] = targetValue
174
+
175
+ for child, childValue in keysbyTarget.items():
176
+ self.update(child, childValue, changed)
177
+ # FINISHED!
178
+
179
+ def update(self, node: Node, newValue: Lattice, changed: set[Node]):
180
+ value = self.lattice[node]
181
+ if newValue.isEqual(value):
182
+ return False
183
+ self.lattice[node] = newValue
184
+ changed.add(node)
185
+
186
+ def terminate(self, node: Node, value: Lattice, changed: set[Node]):
187
+ if self.terminatedCache.get(node):
188
+ return self.terminatedCache[node]
189
+
190
+ terminated: list[int] = []
191
+
192
+ for edge in node.getAllEdges():
193
+ if edge.noAdvance:
194
+ continue
195
+
196
+ if edge.key is None or isinstance(edge.key, int):
197
+ continue
198
+
199
+ terminated.append(edge.key[0])
200
+
201
+ result = Lattice(terminated)
202
+ self.terminatedCache[node] = result
203
+ return result
204
+
205
+ def visit(self, node: Node, path: list[Node]):
206
+ value = self.lattice[node]
207
+ terminated = (
208
+ self.terminatedCache[node]
209
+ if self.terminatedCache.get(node)
210
+ else EMPTY_VALUE
211
+ )
212
+ if terminated.isEqual(EMPTY_VALUE):
213
+ value = value.subtract(terminated)
214
+ if value.isEqual(EMPTY_VALUE):
215
+ return
216
+
217
+ for edge in node.getAllEdges():
218
+ if edge.noAdvance:
219
+ continue
220
+ edgeValue = value
221
+ if not (not edge.key or isinstance(edge.key, int)):
222
+ edgeValue = edgeValue.intersect(Lattice([edge.key[0]]))
223
+
224
+ if edgeValue.isEqual(EMPTY_VALUE):
225
+ continue
226
+
227
+ def indexOf(path: list, obj):
228
+ try:
229
+ return path.index(obj)
230
+ except Exception:
231
+ return -1
232
+
233
+ if indexOf(path, node) != -1:
234
+ if len(path) == 0:
235
+ raise Exception(
236
+ f'Detected a loop in "{edge.node.name}" though : {edge.node.name}'
237
+ )
238
+
239
+ raise Exception(
240
+ 'Detected loop in "'
241
+ + edge.node.name
242
+ + '" through chain '
243
+ + (" -> ").join(['"' + name.name + '"' for name in path])
244
+ )
245
+
246
+ self.visit(edge.node, path.extend([edge.node]))