llparse 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llparse/frontend.py ADDED
@@ -0,0 +1,527 @@
1
+ from dataclasses import dataclass, field
2
+ from typing import Literal, Optional, Union
3
+
4
+ from .enumerator import Enumerator
5
+ from .pybuilder import LoopChecker
6
+ from .pybuilder import builder as source
7
+ # from pyfront.namespace import code, node , transform
8
+ from .pyfront import namespace as _frontend
9
+ from .pyfront.front import Identifier, IWrap, SpanField
10
+ from .pyfront.implementation import IImplementation
11
+ from .pyfront.nodes import ITableEdge
12
+ from .pyfront.peephole import Peephole
13
+ from .spanalloc import SpanAllocator
14
+ from .trie import Trie, TrieEmpty, TrieNode, TrieSequence, TrieSingle
15
+
16
+ DEFAULT_MIN_TABLE_SIZE = 32
17
+ DEFAULT_MAX_TABLE_WIDTH = 4
18
+
19
+
20
+
21
+ WrappedNode = IWrap[_frontend.node.Node]
22
+ WrappedCode = IWrap[_frontend.code.Code]
23
+
24
+
25
+
26
+
27
+ @dataclass
28
+ class ITableLookupTarget:
29
+ trie: TrieEmpty
30
+ noAdvance: bool
31
+ keys: list[int] = field(default_factory=list)
32
+
33
+
34
+ # TODO (Vizonex) Enable logging to diagnose bigger issues whenever logging is enabled by the User
35
+
36
+ # For those who care...
37
+ # The Original Build Time: 5 - 7 Hours (Including tanslation of typescript libraries)
38
+ # about 15 testruns so far...
39
+ # FrontEnd Code Time : 2 Days with several 4 - 5 Hour Sessions...
40
+
41
+ # Hardest Part: Figuring out my otherwise block all the way inside of the builder was the problem:
42
+ # Causing me to bak and forth for 8 hours until I found the problem in the builder itself. :(
43
+
44
+ # Second Hardest Part: Implementing Span allocator due to anonymous
45
+ # allocate Function being difficult to implemement
46
+ # due to "=>" key
47
+
48
+ # Most Intresting Part "so far" - for me (Vizonex) translating llparse's builder module and this frontend
49
+ # I think the C compiler will be alot more intresting than this somewhat...
50
+
51
+ # I Think I could use the frontend to help gather span api calls
52
+ # and other callbacks To Then make a compilable Settings
53
+ # API like in llhttp but instead
54
+ # having api.h be compilable with all the little marco Span
55
+ # Callbacks as well , Cython .pxd Compiler could be called
56
+ # afterwards to handle all properties and settings
57
+
58
+ # Allow me to leave you with this Quote by me
59
+
60
+ # "If You want to accomplish something, do it yourself" - Vizonex
61
+
62
+
63
+ # TODO (Vizonex) Make a Mini python enum to C enum Compiler as a Cool Demo...
64
+
65
+ # (WARNING!) I Plan to Drop 3.9 Support Later this Summer (2023)....
66
+ # I will work on a numeric conv Vulnerability Str to int cap Bypass When I upgrade...
67
+
68
+
69
+ @dataclass
70
+ class IFrontendResult:
71
+ prefix: str
72
+ root: IWrap[_frontend.node.Node]
73
+ properties: list[source.Property] = field(default_factory=list)
74
+ spans: list[_frontend.node.SpanField] = field(default_factory=list)
75
+ resumptionTargets: set[IWrap[_frontend.node.Node]] = field(default_factory=set)
76
+
77
+
78
+ @dataclass
79
+ class IFrontendOptions:
80
+ maxTableWidth: int
81
+ minTableSize: int
82
+
83
+
84
+ MatchChildren = list[WrappedNode]
85
+ MatchResult = Union[WrappedNode, list[WrappedNode]]
86
+
87
+
88
+ class Frontend:
89
+ def __init__(
90
+ self,
91
+ prefix: str,
92
+ implementation: IImplementation = IImplementation(),
93
+ options: dict[Literal["maxTableElemWidth", "minTableSize"], int] = dict(),
94
+ ) -> None:
95
+ self.prefix = prefix
96
+ self.Id = Identifier(self.prefix + "__n_")
97
+ self.codeId = Identifier(self.prefix + "__c_")
98
+ self.Map: dict[source.code.Node, IWrap[_frontend.node.Node]] = {}
99
+ self.spanMap: dict[source.code.Span, SpanField] = {}
100
+ self.codeCache: dict[str, WrappedCode] = {}
101
+ self.resumptionTargets: set[WrappedNode] = set()
102
+ self.implementation = implementation
103
+ self.prefix = prefix
104
+ self.options: dict[Literal["maxTableElemWidth", "minTableSize"], int] = {
105
+ "maxTableElemWidth": options.get(
106
+ "maxTableElemWidth", DEFAULT_MAX_TABLE_WIDTH
107
+ ),
108
+ "minTableSize": options.get("minTableSize", DEFAULT_MIN_TABLE_SIZE),
109
+ }
110
+
111
+ if 0 > self.options["maxTableElemWidth"]:
112
+ raise AssertionError(
113
+ "Invalid `options.maxTableElemWidth`, must be positive"
114
+ )
115
+
116
+ def compile(self, root: source.code.Node, properties: list[source.Property] = []):
117
+ lc = LoopChecker()
118
+ lc.check(root)
119
+
120
+ spanAllocator = SpanAllocator()
121
+ sourceSpans = spanAllocator.allocate(root)
122
+
123
+ spans: list[SpanField] = []
124
+ for index, concurrent in enumerate(sourceSpans.concurrency):
125
+ span = SpanField(
126
+ index, [self.translateCode(c.callback) for c in concurrent]
127
+ )
128
+
129
+ for sourceSpan in concurrent:
130
+ self.spanMap[sourceSpan] = span
131
+
132
+ spans.append(span)
133
+
134
+ # from .debug import Debugger
135
+ # o = Debugger.getAllNodes(root)
136
+ # print("debug",o)
137
+ # Translate Code
138
+ out = self.translate(root)
139
+
140
+ # Enumerate
141
+ enumerator = Enumerator()
142
+ nodes = enumerator.getAllNodes(out)
143
+ # Peephole optimizations...
144
+ peephole = Peephole()
145
+ out = peephole.optimize(out, nodes)
146
+
147
+ # Re-Enumerate
148
+ nodes = enumerator.getAllNodes(out)
149
+
150
+ # DONT FORGET TO ADD "OUT" TO THE RESUMPTION TARGETS!!!
151
+ self.resumptionTargets.add(out)
152
+
153
+ # Register resumption targets...
154
+ for node in nodes:
155
+ self.registerNode(node)
156
+
157
+ return IFrontendResult(
158
+ prefix=self.prefix,
159
+ properties=properties,
160
+ resumptionTargets=self.resumptionTargets,
161
+ root=out,
162
+ spans=spans,
163
+ )
164
+
165
+ def translateMatch(self, node: source.code.Match) -> list[WrappedNode]:
166
+ trie = Trie(node.name)
167
+ assert node.getOtherwiseEdge()
168
+ trieNode = trie.build(list(node))
169
+
170
+ if not trieNode:
171
+ # print("[DEBUG]", "TrieNode was nonexistant")
172
+ return self.implementation.node.Empty(
173
+ _frontend.node.Empty(self.Id.id(node.name))
174
+ )
175
+
176
+ children: MatchChildren = []
177
+
178
+ self.translateTrie(node, trieNode, children)
179
+ assert children
180
+
181
+ return children
182
+
183
+ def registerNode(self, node: WrappedNode):
184
+ # NOTE NO Implementations required here since this is python!
185
+ if isinstance(
186
+ node.ref,
187
+ (
188
+ _frontend.node.Consume,
189
+ _frontend.node.Empty,
190
+ _frontend.node.Sequence,
191
+ _frontend.node.Single,
192
+ _frontend.node.TableLookup,
193
+ ),
194
+ ):
195
+ self.resumptionTargets.add(node)
196
+ elif isinstance(node.ref, (_frontend.node.Pause, _frontend.node.SpanEnd)):
197
+ self.resumptionTargets.add(node.ref.otherwise.node)
198
+
199
+ def translate(self, node: source.code.Node):
200
+ if self.Map.get(node) is not None:
201
+ return self.Map[node]
202
+
203
+ def ID():
204
+ return self.Id.id(node.name)
205
+
206
+ nodeImpl = self.implementation.node
207
+
208
+ if isinstance(node, source.code.Error):
209
+ result = nodeImpl.Error(_frontend.node.Error(ID(), node.code, node.reason))
210
+
211
+ elif isinstance(node, source.code.Pause):
212
+ result = nodeImpl.Pause(_frontend.node.Error(ID(), node.code, node.reason))
213
+
214
+ elif isinstance(node, source.code.Comsume):
215
+ result = nodeImpl.Consume(_frontend.node.Consume(ID(), node.field))
216
+
217
+ elif isinstance(node, source.code.SpanStart):
218
+ result = nodeImpl.SpanStart(
219
+ _frontend.node.SpanStart(
220
+ ID(),
221
+ self.spanMap[node.span],
222
+ self.translateSpanCode(node.span.callback),
223
+ )
224
+ )
225
+
226
+ elif isinstance(node, source.code.SpanEnd):
227
+ result = nodeImpl.SpanEnd(
228
+ _frontend.node.SpanEnd(
229
+ ID(),
230
+ self.spanMap[node.span],
231
+ self.translateSpanCode(node.span.callback),
232
+ )
233
+ )
234
+
235
+ elif isinstance(node, source.code.Invoke):
236
+ assert node.code.signature in ["match", "value"], (
237
+ "Passing `span` callback to `invoke` is not allowed"
238
+ )
239
+ result = nodeImpl.Invoke(
240
+ _frontend.node.Invoke(ID(), self.translateCode(node.code))
241
+ )
242
+
243
+ elif isinstance(node, source.code.Match):
244
+ result = self.translateMatch(node)
245
+ else:
246
+ raise Exception(f'Unknown Node Type for :"{node.name}" {type(node)}')
247
+
248
+ otherwise = node.getOtherwiseEdge()
249
+
250
+ if isinstance(result, list):
251
+ # result:list[WrappedNode]
252
+ assert isinstance(node, source.code.Match)
253
+ _match = node
254
+
255
+ if not otherwise:
256
+ raise Exception(f'Node "{node.name}" has no ".otherwise()"')
257
+
258
+ else:
259
+ for child in result:
260
+ if not child.ref.otherwise:
261
+ child.ref.setOtherwise(
262
+ self.translate(otherwise.node), otherwise.noAdvance
263
+ )
264
+
265
+ transform = self.translateTransform(_match.getTransform())
266
+ for child in result:
267
+ # TODO Vizonex : This might break , be sure to make a workaround function here...
268
+ child.ref.setTransform(transform)
269
+
270
+ assert len(result) >= 1
271
+ return result[0]
272
+
273
+ else:
274
+ single: WrappedNode = result
275
+
276
+ assert isinstance(single.ref, _frontend.node.Node)
277
+
278
+ self.Map[node] = single
279
+
280
+ if otherwise is not None:
281
+ single.ref.setOtherwise(
282
+ self.translate(otherwise.node), otherwise.noAdvance
283
+ )
284
+
285
+ else:
286
+ assert isinstance(node, source.code.Error), (
287
+ f'Node "{node.name}" has no `.otherwise()'
288
+ )
289
+
290
+ if isinstance(single.ref, _frontend.node.Invoke):
291
+ for edge in node:
292
+ # print(edge.key)
293
+ single.ref.addEdge(
294
+ ord(edge.key) if isinstance(edge.key, str) else edge.key,
295
+ self.translate(edge.node),
296
+ )
297
+ else:
298
+ assert len(list(node)) == 0
299
+
300
+ return single
301
+
302
+ def maybeTableLookup(
303
+ self, node: source.code.Match, trie: TrieSingle, children: MatchChildren
304
+ ):
305
+ if len(trie.children) < self.options["minTableSize"]:
306
+ return None
307
+
308
+ targets: dict[source.code.Node, ITableLookupTarget] = {}
309
+ bailout = False
310
+ for child in trie.children:
311
+ if isinstance(child.node, TrieEmpty):
312
+ print(
313
+ 'non-leaf trie child of "%s" prevents table allocation' % node.name
314
+ )
315
+ bailout = False
316
+ continue
317
+
318
+ empty: TrieEmpty = child.node
319
+ if getattr(empty, "value", None) is None:
320
+ print(
321
+ 'value passing trie leaf of "%s" prevents table allocation'
322
+ % node.name
323
+ )
324
+ bailout = False
325
+ continue
326
+
327
+ target = empty.node
328
+ if not targets.get(target):
329
+ targets[target] = ITableLookupTarget(
330
+ keys=[child.key], noAdvance=child.noAdvance, trie=empty
331
+ )
332
+ bailout = True
333
+ break
334
+
335
+ existing = targets[target]
336
+
337
+ if existing.noAdvance != child.noAdvance:
338
+ # print('noAdvance mismatch in a trie leaf of "%s" prevents table allocation' % node.name)
339
+ bailout = False
340
+ break
341
+
342
+ existing.keys.append(child.key)
343
+
344
+ # TODO: see if breaking or continue block after out is breakout has been determined is good ot not...
345
+ bailout = True
346
+ break
347
+
348
+ # assert len(trie.children) == len(targets), "Something went wrong"
349
+ if bailout:
350
+ return
351
+
352
+ # Weave width limit for optimization...
353
+ if len(targets.keys()) >= (1 << self.options["maxTableElemWidth"]):
354
+ # print('too many different trie targets of "%s" for a table allocation' % node.name)
355
+ return
356
+
357
+ table = self.implementation.node.TableLookup(
358
+ _frontend.node.TableLookup(self.Id.id(node.name))
359
+ )
360
+ children.append(table)
361
+
362
+ # Break Loop
363
+ if self.Map.get(node):
364
+ self.Map[node] = table
365
+
366
+ for target in targets.values():
367
+ _next = self.translateTrie(node, target, children)
368
+ table.ref.addEdge(
369
+ ITableEdge(keys=target.keys, noAdvance=target.noAdvance, node=_next)
370
+ )
371
+
372
+ # print('optimized "%s" to a table lookup node' % node.name)
373
+ # Node Has been Optimized to a table Lookup , Now return...
374
+ return table
375
+
376
+ def translateSequence(
377
+ self, node: source.code.Match, trie: TrieSequence, children: MatchChildren
378
+ ) -> IWrap[_frontend.node.Match]:
379
+ sequence = self.implementation.node.Sequence(
380
+ _frontend.node.Sequence(self.Id.id(node.name), trie.select)
381
+ )
382
+
383
+ children.append(sequence)
384
+
385
+ if not self.Map.get(node):
386
+ self.Map[node] = sequence
387
+
388
+ childNode = self.translateTrie(node, trie.child, children)
389
+
390
+ value = trie.child.value if isinstance(trie.child, TrieEmpty) else None
391
+
392
+ sequence.ref.setEdge(childNode, value)
393
+
394
+ return sequence
395
+
396
+ def translateTrie(
397
+ self, node: source.code.Match, trie: TrieNode, children: MatchChildren
398
+ ):
399
+ if isinstance(trie, TrieEmpty):
400
+ assert self.Map.get(node)
401
+ return self.translate(trie.node)
402
+ elif isinstance(trie, TrieSingle):
403
+ return self.translateSingle(node, trie, children)
404
+ elif isinstance(trie, TrieSequence):
405
+ return self.translateSequence(node, trie, children)
406
+ else:
407
+ raise TypeError("Unknown trie node")
408
+
409
+ def translateSingle(
410
+ self, node: source.code.Match, trie: TrieSingle, children: MatchChildren
411
+ ):
412
+ # Check if Tablelookup could be a valid option to Optimze our code up...
413
+ maybeTable = self.maybeTableLookup(node, trie, children)
414
+
415
+ if maybeTable:
416
+ return maybeTable
417
+
418
+ single = self.implementation.node.Single(
419
+ _frontend.node.Single(self.Id.id(node.name))
420
+ )
421
+ children.append(single)
422
+
423
+ # Break loop...
424
+ if not self.Map.get(node):
425
+ self.Map[node] = single
426
+
427
+ for child in trie.children:
428
+ childNode = self.translateTrie(node, child.node, children)
429
+
430
+ single.ref.addEdge(
431
+ key=child.key,
432
+ noAdvance=child.noAdvance,
433
+ node=childNode,
434
+ value=child.node.value if isinstance(child.node, TrieEmpty) else None,
435
+ )
436
+
437
+ otherwise = trie.otherwise
438
+ if otherwise:
439
+ single.ref.setOtherwise(
440
+ self.translateTrie(node, otherwise, children), True, otherwise.value
441
+ )
442
+ return single
443
+
444
+ def translateSpanCode(self, code: source.code._Span):
445
+ return self.translateCode(code)
446
+
447
+ # TODO Vizonex Maybe better typehining can be used in this function alone....
448
+ def translateCode(self, code: source.code.Code):
449
+ """Translates Builder Classes to Frontend Classes..."""
450
+
451
+ prefixed = self.codeId.id(code.name).name
452
+ codeImpl = self.implementation.code
453
+
454
+ # res : WrappedCode
455
+ if isinstance(code, source.code.IsEqual):
456
+ res = codeImpl.IsEqual(
457
+ _frontend.code.IsEqual(prefixed, code.field, code.value)
458
+ )
459
+
460
+ elif isinstance(code, source.code.Load):
461
+ res = codeImpl.Load(_frontend.code.Load(prefixed, code.field))
462
+
463
+ elif isinstance(code, source.code.MulAdd):
464
+ m = _frontend.code.MulAdd(
465
+ prefixed,
466
+ code.field,
467
+ _frontend.code.IMulAddOptions(
468
+ code.options.base, code.options.max, code.options.signed
469
+ ),
470
+ )
471
+ res = codeImpl.MulAdd(m)
472
+
473
+ elif isinstance(code, source.code.And):
474
+ # NOTE (Vizonex) I did see the frontend on the Typescript Version Using "Or" instead of "And"
475
+ # line 460 of llparse-frontend/src/frontend.ts
476
+ # So I'm wondering if that was an accident or by design. Might need to Open A Github issue about it...
477
+ res = codeImpl.And(_frontend.code.And(prefixed, code.field, code.value))
478
+ elif isinstance(code, source.code.Or):
479
+ res = codeImpl.Or(_frontend.code.Or(prefixed, code.field, code.value))
480
+
481
+ elif isinstance(code, source.code.Store):
482
+ res = codeImpl.Store(_frontend.code.Store(code.name, code.field))
483
+
484
+ elif isinstance(code, source.code.Test):
485
+ res = codeImpl.Test(_frontend.code.Test(prefixed, code.field, code.value))
486
+
487
+ elif isinstance(code, source.code.Update):
488
+ res = codeImpl.Update(
489
+ _frontend.code.Update(prefixed, code.field, code.value)
490
+ )
491
+
492
+ # External Callbacks...
493
+
494
+ elif isinstance(code, source.code._Span):
495
+ res = codeImpl.Span(_frontend.code.Span(code.name))
496
+ elif isinstance(code, source.code._Match):
497
+ res = codeImpl.Match(_frontend.code.Match(code.name))
498
+ elif isinstance(code, source.code.Value):
499
+ res = codeImpl.Value(_frontend.code.Value(code.name))
500
+
501
+ else:
502
+ raise Exception(f'UnSupported code:"{code.name}" type: "{type(code)}"')
503
+
504
+ if self.codeCache.get(res.ref.cacheKey):
505
+ return self.codeCache[res.ref.cacheKey]
506
+
507
+ self.codeCache[res.ref.cacheKey] = res
508
+ return res
509
+
510
+ def translateTransform(
511
+ self, transform: Optional[source.code.Transform]
512
+ ) -> IWrap[
513
+ Union[
514
+ _frontend.transform.Transform,
515
+ _frontend.transform.ID,
516
+ _frontend.transform.ToLower,
517
+ _frontend.transform.ToLowerUnsafe,
518
+ ]
519
+ ]:
520
+ transformImpl = self.implementation.transform
521
+ if not transform or transform.name == "id":
522
+ return transformImpl.ID(_frontend.transform.ID())
523
+ elif transform.name == "to_lower":
524
+ return transformImpl.ToLower(_frontend.transform.ToLower())
525
+
526
+ elif transform.name == "to_lower_unsafe":
527
+ return transformImpl.ToLowerUnsafe(_frontend.transform.ToLowerUnsafe())
llparse/header.py ADDED
@@ -0,0 +1,89 @@
1
+ """
2
+ The Second to Final Part I translated to python
3
+ translated from typescript
4
+
5
+ This module helps with building Header Files or `.h` files
6
+ In C these can help with defining important definiations
7
+
8
+ """
9
+
10
+ from typing import Optional
11
+
12
+ from .frontend import SpanField
13
+ from .pybuilder import Property
14
+
15
+
16
+ class HeaderBuilder:
17
+ def __init__(
18
+ self,
19
+ prefix: str,
20
+ headerGuard: Optional[str] = None,
21
+ Properties: list[Property] = [],
22
+ spans: list[SpanField] = [],
23
+ ) -> None:
24
+ self.Properties = Properties
25
+ self.prefix = prefix
26
+ self.headerGuard = headerGuard
27
+ self.spans = spans
28
+
29
+ def build(self):
30
+ """Builds The string to create the header file"""
31
+ res = ""
32
+ PREFIX = self.prefix.upper()
33
+ DEFINE = f"INCLUDE_{PREFIX}_H_" if not self.headerGuard else self.headerGuard
34
+
35
+ res += f"#ifndef {DEFINE}\n"
36
+ res += f"#define {DEFINE}\n"
37
+ res += "#ifdef __cplusplus\n"
38
+ res += 'extern "C" {\n'
39
+ res += "#endif\n"
40
+ res += "\n"
41
+
42
+ res += "#include <stdint.h>\n"
43
+ res += "\n"
44
+
45
+ # Main Structure
46
+ res += f"typedef struct {self.prefix}_s {self.prefix}_t;\n"
47
+ res += f"struct {self.prefix}_s " + "{\n"
48
+ res += " int32_t _index;\n"
49
+
50
+ for index, field in enumerate(self.spans):
51
+ res += f" void* _span_pos{index};\n"
52
+ if len(field.callbacks) > 1:
53
+ res += f" void* _span_cb{index};\n"
54
+
55
+ res += " int32_t error;\n"
56
+ res += " const char* reason;\n"
57
+ res += " const char* error_pos;\n"
58
+ res += " void* data;\n"
59
+ res += " void* _current;\n"
60
+
61
+ for prop in self.Properties:
62
+ if prop.ty == "i8":
63
+ ty = "uint8_t"
64
+ elif prop.ty == "i16":
65
+ ty = "uint16_t"
66
+ elif prop.ty == "i32":
67
+ ty = "uint32_t"
68
+ elif prop.ty == "i64":
69
+ ty = "uint64_t"
70
+ elif prop.ty == "ptr":
71
+ ty = "void*"
72
+ else:
73
+ raise Exception(f'Unknown state property type: "{prop.ty}"')
74
+
75
+ res += f" {ty} {prop.name};\n"
76
+ res += "};"
77
+
78
+ res += "\n"
79
+
80
+ res += f"int {self.prefix}_init({self.prefix}_t* s);\n"
81
+ res += f"int {self.prefix}_execute({self.prefix}_t* s, const char* p, const char* endp);\n"
82
+
83
+ res += "\n"
84
+
85
+ res += "#ifdef __cplusplus\n"
86
+ res += '} /* extern "C" */\n'
87
+ res += "#endif\n"
88
+ res += f"#endif /* {DEFINE} */"
89
+ return res