llparse 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
llparse/compilator.py ADDED
@@ -0,0 +1,1190 @@
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass, field
3
+ from typing import Generic, Optional, TypeVar
4
+
5
+ from .constants import *
6
+ from .frontend import IWrap, WrappedNode, _frontend
7
+
8
+ # NOTE Unfortunately You cant Just import from different files as that would trigger a Circular import
9
+ # So this file a little bit bigger than what I hoped for but was my only solution - Vizonex
10
+
11
+
12
+ @dataclass
13
+ class Transform(ABC):
14
+ ref: _frontend.transform.Transform
15
+
16
+ @abstractmethod
17
+ def build(self, ctx: "Compilation", value: str) -> None: ...
18
+
19
+
20
+ @dataclass
21
+ class ID(Transform):
22
+ def build(self, ctx: "Compilation", value: str):
23
+ return value
24
+
25
+
26
+ @dataclass
27
+ class ToLowerUnsafe(Transform):
28
+ def build(self, ctx: "Compilation", value: str):
29
+ return f"(({value})| 0x20)"
30
+
31
+
32
+ @dataclass
33
+ class ToLower(Transform):
34
+ def build(self, ctx: "Compilation", value: str):
35
+ return f"(({value}) >= 'A' && ({value}) <= 'Z' ? ({value} | 0x20) : ({value}))"
36
+
37
+
38
+ @dataclass
39
+ class MatchSequence:
40
+ transform: Transform
41
+
42
+ @staticmethod
43
+ def buildGlobals(out: list[str]):
44
+ out.append("enum llparse_match_status_e {")
45
+ out.append(f" {SEQUENCE_COMPLETE},")
46
+ out.append(f" {SEQUENCE_PAUSE},")
47
+ out.append(f" {SEQUENCE_MISMATCH}")
48
+ out.append("};")
49
+ out.append("typedef enum llparse_match_status_e llparse_match_status_t;")
50
+ out.append("")
51
+ out.append("struct llparse_match_s {")
52
+ out.append(" llparse_match_status_t status;")
53
+ out.append(" const unsigned char* current;")
54
+ out.append("};")
55
+ out.append("typedef struct llparse_match_s llparse_match_t;")
56
+
57
+ def getName(self):
58
+ return f"llparse_match_sequence_{self.transform.ref.name}"
59
+
60
+ def build(self, ctx: "Compilation", out: list[str]):
61
+ out.append(f"static llparse_match_t {self.getName()}(")
62
+ out.append(f" {ctx.prefix}_t* s, const unsigned char* p,")
63
+ out.append(" const unsigned char* endp,")
64
+ out.append(" const unsigned char* seq, uint32_t seq_len) {")
65
+
66
+ # Vars
67
+ out.append(" uint32_t index;")
68
+ out.append(" llparse_match_t res;")
69
+ out.append("")
70
+
71
+ out.append(" index = s->_index;")
72
+ out.append(" for (;p != endp; p++) {")
73
+ out.append(" unsigned char current;")
74
+ out.append("")
75
+ out.append(f" current = {self.transform.build(ctx, '*p')};")
76
+ out.append(" if (current == seq[index]) {")
77
+ out.append(" if (++index == seq_len) {")
78
+ out.append(f" res.status = {SEQUENCE_COMPLETE};")
79
+ out.append(" goto reset;")
80
+ out.append(" }")
81
+ out.append(" } else {")
82
+ out.append(f" res.status = {SEQUENCE_MISMATCH};")
83
+ out.append(" goto reset;")
84
+ out.append(" }")
85
+ out.append(" }")
86
+ out.append(" s->_index = index;")
87
+ out.append(f" res.status = {SEQUENCE_PAUSE};")
88
+ out.append(" res.current = p;")
89
+ out.append(" return res;")
90
+
91
+ out.append("reset:")
92
+ out.append(" s->_index = 0;")
93
+ out.append(" res.current = p;")
94
+ out.append(" return res;")
95
+ out.append("};")
96
+
97
+
98
+ T = TypeVar("T", _frontend.code.Code, _frontend.code.Field)
99
+
100
+
101
+ class Code(Generic[T]):
102
+ def __init__(self, ref: T):
103
+ self.ref = ref
104
+
105
+ def build(self, ctx: "Compilation", out: list[str]):
106
+ pass
107
+
108
+
109
+ class External(Code[_frontend.code.External]):
110
+ def build(self, ctx: "Compilation", out: list[str]):
111
+ out.append(f"int {self.ref.name} (")
112
+ out.append(f" {ctx.prefix}_t* s, const unsigned char* p,")
113
+ if self.ref.signature == "value":
114
+ out.append(" const unsigned char* endp,")
115
+ out.append(" int value);")
116
+ else:
117
+ out.append(" const unsigned char* endp);")
118
+
119
+
120
+ class Field(Code):
121
+ def __init__(self, ref: _frontend.code.Field):
122
+ self.ref = ref
123
+
124
+ def build(self, ctx: "Compilation", out: list[str]):
125
+ out.append(f"int {self.ref.name} (")
126
+ out.append(f" {ctx.prefix}_t* {ctx.stateArg()},")
127
+ out.append(f" const unsigned char* {ctx.posArg()},")
128
+ if self.ref.signature == "value":
129
+ out.append(f" const unsigned char* {ctx.endPosArg()},")
130
+ out.append(f" int {ctx.matchVar()}) " + "{")
131
+ else:
132
+ out.append(f" const unsigned char* {ctx.endPosArg()}) " + "{")
133
+
134
+ tmp: list[str] = []
135
+
136
+ self.doBuild(ctx, tmp)
137
+ ctx.indent(out, tmp, " ")
138
+ out.append("}")
139
+
140
+ def doBuild(self, ctx: "Compilation", out: list[str]):
141
+ return
142
+
143
+ def field(self, ctx: "Compilation"):
144
+ return f"{ctx.stateArg()}->{self.ref.field}"
145
+
146
+
147
+ class And(Field):
148
+ def __init__(self, ref: _frontend.code.And):
149
+ self.ref = ref
150
+
151
+ def doBuild(self, ctx: "Compilation", out: list[str]):
152
+ out.append(f"{self.field(ctx)} &= {self.ref.value}")
153
+
154
+
155
+ class IsEqual(Field):
156
+ def __init__(self, ref: _frontend.code.IsEqual):
157
+ self.ref = ref
158
+
159
+ def doBuild(self, ctx: "Compilation", out: list[str]):
160
+ out.append(f"return {self.field(ctx)} == {self.ref.value};")
161
+
162
+
163
+ class Load(Field):
164
+ def __init__(self, ref: _frontend.code.Load):
165
+ super().__init__(ref)
166
+
167
+ def doBuild(self, ctx: "Compilation", out: list[str]):
168
+ out.append(f"return {self.field(ctx)};")
169
+
170
+
171
+ # BIG ONE
172
+
173
+
174
+ class MulAdd(Field):
175
+ def __init__(self, ref: _frontend.code.MulAdd):
176
+ self.ref = ref
177
+
178
+ def doBuild(self, ctx: "Compilation", out: list[str]):
179
+ options = self.ref.options
180
+ ty = ctx.getFieldType(self.ref.field)
181
+
182
+ field = self.field(ctx)
183
+
184
+ if options.signed:
185
+ if not SIGNED_TYPES.get(ty):
186
+ raise AssertionError(f'Unexpected mulAdd type "{ty}"')
187
+
188
+ targetTy = SIGNED_TYPES[ty]
189
+ out.append(f"{targetTy}* field = ({targetTy}*) &{field}")
190
+ field = "(*field)"
191
+
192
+ _match = ctx.matchVar()
193
+
194
+ limits = SIGNED_LIMITS if options.signed else UNSIGNED_LIMITS
195
+
196
+ if not limits.get(ty):
197
+ raise AssertionError(f'Unexpected mulAdd type "{ty}"')
198
+
199
+ _min, _max = limits[ty]
200
+
201
+ mulMax = f"{_max} / {options.base}"
202
+ mulMin = f"{_min} / {options.base}"
203
+
204
+ out.append("/* Multiplication overflow */")
205
+ out.append(f"if ({field} > {mulMax}) " + "{")
206
+ out.append(" return 1;")
207
+ out.append("}")
208
+
209
+ if options.signed:
210
+ out.append(f"if ({field} < {mulMin}) " + "{")
211
+ out.append(" return 1;")
212
+ out.append("}")
213
+
214
+ out.append("")
215
+
216
+ out.append(f"{field} *= {options.base};")
217
+ out.append("")
218
+
219
+ out.append("/* Addition overflow */")
220
+ out.append(f"if ({_match} >= 0) " + "{")
221
+ out.append(f" if ({field} > {_max} - {_match})")
222
+ out.append(" return 1;")
223
+ out.append(" }")
224
+ # out.append('}')
225
+
226
+ out.append(f"{field} += {_match};")
227
+
228
+ if options.max:
229
+ out.append("")
230
+ out.append("/* Enforce maximum */")
231
+ out.append(f"if ({field} > {options.max}) " + "{")
232
+ out.append(" return 1;")
233
+ out.append("}")
234
+
235
+ out.append("return 0;")
236
+
237
+
238
+ class Or(Field):
239
+ def __init__(self, ref: _frontend.code.Or):
240
+ self.ref = ref
241
+
242
+ def doBuild(self, ctx: "Compilation", out: list[str]):
243
+ out.append(f"{self.field(ctx)} |= {self.ref.value};")
244
+ out.append("return 0;")
245
+
246
+
247
+ class Store(Field):
248
+ def __init__(self, ref: _frontend.code.Store):
249
+ self.ref = ref
250
+
251
+ def doBuild(self, ctx: "Compilation", out: list[str]):
252
+ out.append(f"{self.field(ctx)} = {ctx.matchVar()};")
253
+ out.append("return 0;")
254
+
255
+
256
+ class Test(Field):
257
+ def __init__(self, ref: _frontend.code.Test):
258
+ self.ref = ref
259
+
260
+ def doBuild(self, ctx: "Compilation", out: list[str]):
261
+ value = self.ref.value
262
+ out.append(f"return ({self.field(ctx)} & {value}) == {value};")
263
+
264
+
265
+ class Update(Field):
266
+ def __init__(self, ref: _frontend.code.Update):
267
+ self.ref = ref
268
+
269
+ def doBuild(self, ctx: "Compilation", out: list[str]):
270
+ out.append(f"{self.field(ctx)} = {self.ref.value};")
271
+ out.append("return 0;")
272
+
273
+
274
+ @dataclass
275
+ class INodeEdge:
276
+ node: IWrap[_frontend.node.Node]
277
+ noAdvance: bool
278
+ value: Optional[int] = None
279
+
280
+
281
+ class Node:
282
+ def __init__(self, ref: _frontend.node.Node) -> None:
283
+ self.ref = ref
284
+ self.cachedDecel: Optional[str] = None
285
+ self.privCompilation: Optional["Compilation"] = None
286
+
287
+ def build(self, compilation: "Compilation"):
288
+ if self.cachedDecel:
289
+ return self.cachedDecel
290
+
291
+ res = STATE_PREFIX + self.ref.id.name
292
+ # cached Decel Prevents Recursion errors....
293
+ self.cachedDecel = res
294
+ self.privCompilation = compilation
295
+
296
+ out: list[str] = []
297
+ # if "update_key" in res:
298
+ # print(res)
299
+ # print([*self.ref.Slots])
300
+ compilation.debug(
301
+ out,
302
+ f'Entering node \\"{self.ref.id.originalName}\\" (\\"{self.ref.id.name}\\")',
303
+ )
304
+
305
+ self.doBuild(out)
306
+
307
+ compilation.addState(res, out)
308
+
309
+ return res
310
+
311
+ @property
312
+ def compilation(self):
313
+ assert self.privCompilation
314
+ return self.privCompilation
315
+
316
+ def prologue(self, out: list[str]):
317
+ ctx = self.compilation
318
+
319
+ out.append(f"if ({ctx.posArg()} == {ctx.endPosArg()}) " + "{")
320
+
321
+ tmp: list[str] = []
322
+ self.pause(tmp)
323
+
324
+ self.compilation.indent(out, tmp, " ")
325
+ out.append("}")
326
+
327
+ def pause(self, out: list[str]):
328
+ out.append(f"return {self.cachedDecel};")
329
+
330
+ # The problem with the INode Implementation is that It is creating newer and newer values
331
+ # that cannot be matched so Writing out all the arguments was a must to prevent a deadly recursion
332
+ def tailTo(
333
+ self,
334
+ out: list[str],
335
+ node: IWrap[_frontend.node.Node],
336
+ noAdvance: bool,
337
+ value: Optional[int],
338
+ ):
339
+ ctx = self.compilation
340
+ target = ctx.unwrapNode(node).build(ctx)
341
+
342
+ # IF we have already built our target do not continue to build more of them!
343
+ # if not isinstance(t,str):
344
+ # target = t.build(ctx)
345
+ # else:
346
+ # Since we have the target already built let us not forget to use the name once more...
347
+ # target = t
348
+ if not target.startswith(STATE_PREFIX):
349
+ target = STATE_PREFIX + target
350
+
351
+ if not noAdvance:
352
+ out.append(f"{ctx.posArg()}++;")
353
+
354
+ if isinstance(value, int):
355
+ out.append(f"{ctx.matchVar()} = {value};")
356
+
357
+ out.append(f"goto {LABEL_PREFIX}{target};")
358
+
359
+ def doBuild(self, out: list[str]):
360
+ raise NotImplementedError
361
+
362
+
363
+ class Consume(Node):
364
+ def __init__(self, ref: _frontend.node.Consume) -> None:
365
+ self.ref = ref
366
+ super().__init__(ref)
367
+
368
+ def doBuild(self, out: list[str]):
369
+ ctx = self.compilation
370
+
371
+ index = ctx.stateField(self.ref.field)
372
+ ty = ctx.getFieldType(self.ref.field)
373
+
374
+
375
+ if ty == "i64":
376
+ pass
377
+ elif ty == "i32":
378
+ pass
379
+ elif ty == "i16":
380
+ pass
381
+ elif ty == "i8":
382
+ pass
383
+ else:
384
+ raise Exception(
385
+ f"Unsupported type {ty} of field {self.ref.field} for consume node"
386
+ )
387
+
388
+ out.append("size_t avail;")
389
+ out.append("size_t need;")
390
+ out.append("")
391
+ out.append(f"avail = {ctx.endPosArg()} - {ctx.posArg()};")
392
+ out.append(f"need = {index};")
393
+ out.append("if (avail >= need) {")
394
+ out.append(" p += need;")
395
+ out.append(f" {index} = 0;")
396
+ tmp = []
397
+ otherwise = self.ref.otherwise
398
+ assert otherwise
399
+ self.tailTo(tmp, otherwise.node, otherwise.noAdvance, otherwise.value)
400
+ ctx.indent(out, tmp, " ")
401
+ out.append("}")
402
+ out.append("")
403
+
404
+ out.append(f"{index} -= avail;")
405
+ self.pause(out)
406
+
407
+
408
+ class Empty(Node):
409
+ def __init__(self, ref: _frontend.node.Empty) -> None:
410
+ self.ref = ref
411
+ super().__init__(ref)
412
+
413
+ def doBuild(self, out: list[str]):
414
+ assert self.ref.otherwise
415
+ otherwise = self.ref.otherwise
416
+ if not otherwise.noAdvance:
417
+ self.prologue(out)
418
+ self.tailTo(out, otherwise.node, otherwise.noAdvance, otherwise.value)
419
+
420
+
421
+ class Error(Node):
422
+ def __init__(self, ref: _frontend.node.Error) -> None:
423
+ self.ref = ref
424
+ super().__init__(ref)
425
+
426
+ def storeError(self, out: list[str]):
427
+ ctx = self.compilation
428
+
429
+ if self.ref.code < 0:
430
+ hexCode = "-" + hex(self.ref.code)
431
+ else:
432
+ hexCode = hex(self.ref.code)
433
+
434
+ out.append(f"{ctx.errorField()} = {hexCode};")
435
+ out.append(f"{ctx.reasonField()} = {ctx.cstring(self.ref.reason)};")
436
+ out.append(f"{ctx.errorPosField()} = (const char*) {ctx.posArg()};")
437
+
438
+ def doBuild(self, out: list[str]):
439
+ self.storeError(out)
440
+ out.append(
441
+ f"{self.compilation.currentField()} = (void*)(intptr_t) {STATE_ERROR};"
442
+ )
443
+ out.append(f"return {STATE_ERROR};")
444
+
445
+
446
+ class Invoke(Node):
447
+ def __init__(self, ref: _frontend.node.Invoke) -> None:
448
+ self.ref = ref
449
+ super().__init__(ref)
450
+
451
+ def fixBadCalls(self):
452
+ ctx = self.compilation
453
+ if isinstance(self.ref.code.ref, _frontend.code.Store):
454
+ if not self.ref.code.ref.name.startswith(ctx.prefix + "__c_"):
455
+ self.ref.code.ref.name = ctx.prefix + "__c_" + self.ref.code.ref.name
456
+
457
+ def doBuild(self, out: list[str]):
458
+ ctx = self.compilation
459
+ self.fixBadCalls()
460
+
461
+ code = ctx.unwrapCode(self.ref.code)
462
+
463
+ # IF we don't have code it means it has already been registered and we need to cut off
464
+ if not code:
465
+ return None
466
+
467
+ codeDecl = ctx.buildCode(code)
468
+
469
+ args = [ctx.stateArg(), ctx.posArg(), ctx.endPosArg()]
470
+
471
+ signature = code.ref.signature
472
+
473
+ if signature == "value":
474
+ args.append(ctx.matchVar())
475
+
476
+ out.append(f"switch ({codeDecl}({', '.join(args)})) " + "{")
477
+ tmp: str
478
+
479
+ for edge in self.ref.edges():
480
+ out.append(f" case {edge.code}:")
481
+ tmp = []
482
+ self.tailTo(tmp, node=edge.node, noAdvance=True, value=None)
483
+ ctx.indent(out, tmp, " ")
484
+ out.append(" default:")
485
+ tmp = []
486
+ self.tailTo(tmp, self.ref.otherwise.node, self.ref.otherwise.noAdvance, None)
487
+ ctx.indent(out, tmp, " ")
488
+ out.append("}")
489
+
490
+
491
+ class Pause(Error):
492
+ def __init__(self, ref: _frontend.node.Pause) -> None:
493
+ self.ref = ref
494
+ super().__init__(ref)
495
+
496
+ def doBuild(self, out: list[str]):
497
+ ctx = self.compilation
498
+ self.storeError(out)
499
+
500
+ assert self.ref.otherwise
501
+ otherwise = ctx.unwrapNode(self.ref.otherwise.node)
502
+ out.append(f"{ctx.currentField()} = (void*) (intptr_t) {otherwise};")
503
+ out.append(f"return {STATE_ERROR};")
504
+
505
+
506
+ class Sequence(Node):
507
+ def __init__(self, ref: _frontend.node.Sequence) -> None:
508
+ self.ref = ref
509
+ super().__init__(ref)
510
+
511
+ def doBuild(self, out: list[str]):
512
+ ctx = self.compilation
513
+
514
+ out.append("llparse_match_t match_seq;")
515
+ out.append("")
516
+
517
+ self.prologue(out)
518
+
519
+ matchSequence = ctx.getMatchSequence(self.ref.transform)
520
+
521
+ out.append(
522
+ f"match_seq = {matchSequence}({ctx.stateArg()}, "
523
+ + f"{ctx.posArg()},"
524
+ + f"{ctx.endPosArg()}, {ctx.blob(self.ref.select.decode('utf-8')) if isinstance(self.ref.select, str) else ctx.blob(self.ref.select)}, "
525
+ + f"{len(self.ref.select)});"
526
+ )
527
+ out.append("p = match_seq.current;")
528
+
529
+ out.append("switch (match_seq.status) {")
530
+ out.append(f" case {SEQUENCE_COMPLETE}: " + "{")
531
+ tmp = []
532
+ self.tailTo(
533
+ tmp, noAdvance=False, node=self.ref.Edge.node, value=self.ref.Edge.value
534
+ )
535
+
536
+ ctx.indent(out, tmp, " ")
537
+ out.append(" }")
538
+
539
+ out.append(f" case {SEQUENCE_PAUSE}: " + "{")
540
+ tmp = []
541
+ self.pause(tmp)
542
+ ctx.indent(out, tmp, " ")
543
+ out.append(" }")
544
+ out.append(f" case {SEQUENCE_MISMATCH}: " + "{")
545
+ tmp = []
546
+ self.tailTo(tmp, **self.ref.otherwise.__dict__)
547
+ ctx.indent(out, tmp, " ")
548
+ out.append(" }")
549
+ out.append("}")
550
+
551
+
552
+ class Single(Node):
553
+ def __init__(self, ref: _frontend.node.Single) -> None:
554
+ self.ref = ref
555
+ super().__init__(ref)
556
+
557
+ def doBuild(self, out: list[str]):
558
+ ctx = self.compilation
559
+ otherwise = self.ref.otherwise
560
+ assert otherwise
561
+
562
+ self.prologue(out)
563
+ transform = ctx.unwrapTransform(self.ref.transform)
564
+ current = transform.build(ctx, f"*{ctx.posArg()}")
565
+ out.append(f"switch ({current})" + "{")
566
+
567
+ for e in self.ref.edges:
568
+ if e.key < 0x20 or e.key > 0x7E or e.key == 0x27 or e.key == 0x5C:
569
+ ch = e.key
570
+ else:
571
+ ch = f"'{chr(e.key)}'"
572
+
573
+ out.append(f" case {ch}:" + "{")
574
+ tmp: list[str] = []
575
+
576
+ # For now debug everything....
577
+
578
+ self.tailTo(tmp, e.node, e.noAdvance, e.value)
579
+
580
+ ctx.indent(out, tmp, " ")
581
+ out.append(" }")
582
+
583
+ out.append(" default: {")
584
+
585
+ tmp: list[str] = []
586
+ self.tailTo(tmp, otherwise.node, otherwise.noAdvance, None)
587
+ ctx.indent(out, tmp, " ")
588
+ out.append(" }")
589
+ out.append("}")
590
+
591
+
592
+ class SpanStart(Node):
593
+ def __init__(self, ref: _frontend.node.SpanStart) -> None:
594
+ self.ref = ref
595
+
596
+ self.cachedDecel: Optional[str] = None
597
+ self.privCompilation: Optional["Compilation"] = None
598
+
599
+ def doBuild(self, out: list[str]):
600
+ self.prologue(out)
601
+
602
+ ctx = self.compilation
603
+ field = self.ref.field
604
+
605
+ posField = ctx.spanPosField(field.index)
606
+ out.append(f"{posField} = (void*) {ctx.posArg()};")
607
+
608
+ if len(field.callbacks) > 1:
609
+ cbField = ctx.spanCbField(field.index)
610
+ callback = ctx.unwrapCode(self.ref.callback, True)
611
+ out.append(f"{cbField} = {ctx.buildCode(callback)};")
612
+
613
+ otherwise = self.ref.otherwise
614
+ self.tailTo(out, otherwise.node, otherwise.noAdvance, otherwise.value)
615
+
616
+
617
+ class SpanEnd(Node):
618
+ def __init__(self, ref: _frontend.node.SpanEnd) -> None:
619
+ self.ref = ref
620
+ super().__init__(ref)
621
+
622
+ def doBuild(self, out: list[str]):
623
+ out.append("const unsigned char* start;")
624
+ out.append("int err;")
625
+ out.append("")
626
+
627
+ ctx = self.compilation
628
+ field = self.ref.field
629
+ posField = ctx.spanPosField(field.index)
630
+
631
+ # Loast start position
632
+ out.append(f"start = {posField};")
633
+
634
+ # reset position
635
+ out.append(f"{posField} = NULL;")
636
+
637
+ # Invoke callback
638
+ callback = ctx.buildCode(ctx.unwrapCode(self.ref.callback, True))
639
+
640
+ out.append(f"err = {callback}({ctx.stateArg()}, start,{ctx.posArg()});")
641
+
642
+ out.append("if (err != 0) {")
643
+ tmp = []
644
+ self.buildError(tmp, "err")
645
+ ctx.indent(out, tmp, " ")
646
+ out.append("}")
647
+
648
+ otherwise = self.ref.otherwise
649
+ self.tailTo(out, otherwise.node, otherwise.noAdvance, None)
650
+
651
+ def buildError(self, out: list[str], code: str):
652
+ ctx = self.compilation
653
+
654
+ out.append(f"{ctx.errorField()} = {code};")
655
+
656
+ otherwise = self.ref.otherwise
657
+ assert otherwise
658
+
659
+ resumePos = ctx.posArg()
660
+
661
+ if not otherwise.noAdvance:
662
+ resumePos = f"({resumePos} + 1)"
663
+
664
+ out.append(f"{ctx.errorPosField()} = (const char*) {resumePos};")
665
+
666
+ rt = ctx.unwrapNode(otherwise.node)
667
+ # check if the resumption target has already been built or not...
668
+ resumptionTarget = rt.build(ctx)
669
+
670
+ out.append(
671
+ f"{ctx.currentField()} = "
672
+ + f"(void*) (intptr_t) {STATE_PREFIX + resumptionTarget if not resumptionTarget.startswith(STATE_PREFIX) else resumptionTarget};"
673
+ )
674
+ out.append(f"return {STATE_ERROR};")
675
+
676
+
677
+ MAX_CHAR = 0xFF
678
+ TABLE_GROUP = 16
679
+
680
+ # _mm_cmpestri takes 8 ranges
681
+ SSE_RANGES_LEN = 16
682
+
683
+ # _mm_cmpestri takes 128bit input
684
+ SSE_RANGES_PAD = 16
685
+ MAX_SSE_CALLS = 2
686
+ SSE_ALIGNMENT = 16
687
+
688
+
689
+ @dataclass
690
+ class ITable:
691
+ name: str
692
+ declaration: list[str] = field(default_factory=list)
693
+
694
+
695
+ class TableLookup(Node):
696
+ def __init__(self, ref: _frontend.node.TableLookup) -> None:
697
+ self.ref = ref
698
+ super().__init__(ref)
699
+
700
+ def doBuild(self, out: list[str]):
701
+ ctx = self.compilation
702
+
703
+ table = self.buildTable()
704
+ for line in table.declaration:
705
+ out.append(line)
706
+
707
+ self.prologue(out)
708
+
709
+ transform = ctx.unwrapTransform(self.ref.transform)
710
+
711
+ self.buildSSE(out)
712
+
713
+ current = transform.build(ctx, f"*{ctx.posArg()}")
714
+
715
+ out.append(f"switch ({table.name}[(uint8_t) {current}]) " + "{")
716
+ tmp = []
717
+ for index, edge in enumerate(self.ref.privEdges):
718
+ out.append(f" case {index + 1}: " + "{")
719
+ edge = self.ref.privEdges[index]
720
+ self.tailTo(tmp, noAdvance=edge.noAdvance, node=edge.node, value=None)
721
+ ctx.indent(out, tmp, " ")
722
+ out.append(" }")
723
+ tmp.clear()
724
+
725
+ out.append(" default: {")
726
+ self.tailTo(tmp, **self.ref.otherwise.__dict__)
727
+ ctx.indent(out, tmp, " ")
728
+ out.append(" }")
729
+ out.append("}")
730
+
731
+ def buildSSE(self, out: list[str]):
732
+ ctx = self.compilation
733
+
734
+ if self.ref.transform and self.ref.transform.ref.name != "id":
735
+ return False
736
+
737
+ if len(self.ref.privEdges) != 1:
738
+ return False
739
+
740
+ edge = self.ref.privEdges[0]
741
+
742
+ if edge.node.ref != self.ref:
743
+ return False
744
+
745
+ ranges: list[int] = []
746
+
747
+ first: Optional[int] = None
748
+ last: Optional[int] = None
749
+
750
+ for key in edge.keys:
751
+ if not first:
752
+ first = key
753
+ if not last:
754
+ last = key
755
+
756
+ if key - last > 1:
757
+ ranges.extend([first, last])
758
+ first = key
759
+ last = key
760
+
761
+ if first and last:
762
+ ranges.extend([first, last])
763
+
764
+ # Reduce Call load...
765
+ if ranges > MAX_SSE_CALLS * SSE_RANGES_LEN:
766
+ return False
767
+
768
+ out.append("#ifdef __SSE4_2__")
769
+ out.append(f"if ({ctx.endPosArg()}) - {ctx.posArg()} >= 16)" + "{")
770
+ out.append(" __m128i ranges;")
771
+ out.append(" __m128i input;")
772
+ out.append(" int avail;")
773
+ out.append(" int match_len;")
774
+ out.append("")
775
+ out.append(" /* Load input */")
776
+ out.append(f" input = _mm_loadu_si128((__m128i const*) {ctx.posArg()});")
777
+
778
+ for off in range(0, len(ranges), SSE_RANGES_LEN):
779
+ subRanges = ranges[off : off + SSE_RANGES_LEN]
780
+ paddedRanges = subRanges[:]
781
+ while len(paddedRanges) < SSE_RANGES_PAD:
782
+ paddedRanges.append(0)
783
+
784
+ blob = ctx.blob(bytes(paddedRanges), SSE_ALIGNMENT)
785
+
786
+ out.append(f" ranges = _mm_loadu_si128((__128i const*) {blob});")
787
+ out.append(" /* Find first character that does not match 'ranges' */")
788
+ out.append(f" match_len = _mm_cmpestri(ranges, {len(subRanges)})")
789
+ out.append(" input, 16,")
790
+ out.append(" _SIDDUBYTE_OPS | _SIDD_CMP_RANGES |")
791
+ out.append(" _SIDD_NEGATIVE_POLARITY);")
792
+ out.append("")
793
+ out.append(" if (match_len != 0) {")
794
+ out.append(f" {ctx.posArg()} += match_len;")
795
+
796
+ tmp: list[str] = []
797
+ assert not edge.noAdvance
798
+ self.tailTo(tmp, edge.node, True, None)
799
+ ctx.indent(out, tmp, " ")
800
+ out.append(" }")
801
+
802
+ tmp: list[str] = []
803
+
804
+ assert self.ref.otherwise
805
+ self.tailTo(tmp, self.ref.otherwise)
806
+ ctx.indent(out, tmp, " ")
807
+ out.append("}")
808
+ out.append("#endif /* __SSE4_2__ */")
809
+
810
+ return True
811
+
812
+ def buildTable(self):
813
+ table: list[int] = [0 for _ in range(MAX_CHAR + 1)]
814
+ # assert self.ref.privEdges
815
+ for index, edge in enumerate(self.ref.privEdges, 1):
816
+ for key in edge.keys:
817
+ assert table[key] == 0
818
+ table[key] = index
819
+
820
+ lines = ["static uint8_t lookup_table[] = {"]
821
+
822
+ for i in range(0, len(table), TABLE_GROUP):
823
+ # Turn all into string...
824
+ ntable = ", ".join(map(lambda x: "%i" % x, table[i : i + TABLE_GROUP]))
825
+ line = f" {ntable}"
826
+ if i + TABLE_GROUP < len(table):
827
+ line += ","
828
+ lines.append(line)
829
+
830
+ lines.append("};")
831
+
832
+ return ITable(name="lookup_table", declaration=lines)
833
+
834
+
835
+ BLOB_GROUP_SIZE = 11
836
+
837
+ from .pybuilder import Property
838
+
839
+
840
+ @dataclass
841
+ class ICompilerOptions:
842
+ debug: Optional[str] = None
843
+ header: Optional[str] = None
844
+
845
+
846
+ @dataclass
847
+ class IBlob:
848
+ buffer: bytes
849
+ name: str
850
+ alignment: Optional[int] = None
851
+
852
+
853
+ class Compilation:
854
+ def __init__(
855
+ self,
856
+ prefix: str,
857
+ properites: list[Property],
858
+ resumptionsTargets: list[WrappedNode],
859
+ options: ICompilerOptions,
860
+ ) -> None:
861
+ self.prefix = prefix
862
+ self.properties = properites
863
+ self.options = options
864
+ self.resumptionTargets: set[str] = set()
865
+
866
+ # Containers are used to prevent recursions
867
+ self.CodeContainer: dict[IWrap[_frontend.code.Code], Code] = {}
868
+ self.NodeContainer: dict[IWrap[_frontend.node.Node], Node] = {}
869
+
870
+ self.codeMap: dict[str, Code] = {}
871
+ self.stateDict: dict[str, list[str]] = {}
872
+
873
+ self.blobs: dict[bytes, IBlob] = {}
874
+
875
+ self.matchSequence: dict[str, MatchSequence] = {}
876
+
877
+ for node in resumptionsTargets:
878
+ self.resumptionTargets.add(STATE_PREFIX + node.ref.id.name)
879
+
880
+ def buildStateEnum(self, out: list[str]):
881
+ # TODO (Vizonex) Give out other names that you could pass as an enum statename
882
+ # this is incase multiple llparse_state_e states are given to compile
883
+ # example would be mixing llhttp with some other source...
884
+ out.append("enum llparse_state_e {")
885
+ out.append(f" {STATE_ERROR},")
886
+ for stateName in self.stateDict.keys():
887
+ # if stateName in self.resumptionTargets:
888
+ # NOTE I think these are all resumption targets so this will do...
889
+ out.append(f" {stateName},")
890
+ out.append("};")
891
+ out.append("typedef enum llparse_state_e llparse_state_t;")
892
+
893
+ def buildBlobs(self, out: list[str]):
894
+ if len(self.blobs) == 0:
895
+ return
896
+
897
+ for blob in self.blobs.values():
898
+ buffer = blob.buffer
899
+ align = ""
900
+
901
+ # NOTE in llparse there is a check of blob alignment twice
902
+ # so to cut out some redundancy I'll join these two parts into one - Vizonex
903
+ if blob.alignment:
904
+ align = f" ALIGN({blob.alignment})"
905
+ out.append("#ifdef __SSE4_2__")
906
+
907
+ out.append(f"static const unsigned char {align} {blob.name}[] = " + "{")
908
+
909
+ # large loop
910
+
911
+ for i in range(0, len(buffer), BLOB_GROUP_SIZE):
912
+ limit = min(len(buffer), i + BLOB_GROUP_SIZE)
913
+ _hex: list[str] = []
914
+ for j in range(i, limit):
915
+ value = buffer[j]
916
+
917
+ ch = chr(value)
918
+
919
+ if value in [0x27, 0x5C]:
920
+ _hex.append(f"'\\{ch}'")
921
+
922
+ elif value >= 0x20 and value <= 0x7E:
923
+ _hex.append(f"'{ch}'")
924
+
925
+ else:
926
+ _hex.append(f"{hex(value)}")
927
+
928
+ line = " " + ", ".join(_hex)
929
+ if limit != len(buffer):
930
+ line += ","
931
+
932
+ out.append(line)
933
+
934
+ out.append("};")
935
+
936
+ if blob.alignment:
937
+ out.append("#endif /* __SSE4_2__ */")
938
+
939
+ out.append("")
940
+
941
+ def buildMatchSequence(self, out: list[str]):
942
+ if len(self.matchSequence) == 0:
943
+ return
944
+ MatchSequence.buildGlobals(out)
945
+
946
+ for _match in self.matchSequence.values():
947
+ _match.build(self, out)
948
+ out.append("")
949
+
950
+ def reserveSpans(self, spans: list[_frontend.node.SpanField]):
951
+ for span in spans:
952
+ for callback in span.callbacks:
953
+ cb = self.unwrapCode(callback)
954
+ if cb:
955
+ self.buildCode(cb)
956
+
957
+ def debug(self, out: list[str], message: str):
958
+ if not self.options.debug:
959
+ return
960
+
961
+ args = [
962
+ self.stateArg(),
963
+ f"(const char*) {self.posArg()}",
964
+ f"(const char*) {self.endPosArg()}",
965
+ ]
966
+
967
+ out.append(f"{self.options.debug} ({', '.join(args)},")
968
+ out.append(f" {self.cstring(message)});")
969
+
970
+ def buildGlobals(self, out: list[str]):
971
+ if self.options.debug:
972
+ out.append(f"void {self.options.debug}(")
973
+ out.append(f" {self.prefix}_t* s, const char* p, const char* endp,")
974
+ out.append(" const char* msg);")
975
+
976
+ self.buildBlobs(out)
977
+ self.buildMatchSequence(out)
978
+ self.buildStateEnum(out)
979
+
980
+ fix_and_build(self, out)
981
+
982
+ def buildResumptionStates(self, out: list[str]):
983
+ for name, lines in self.stateDict.items():
984
+ if name not in self.resumptionTargets:
985
+ continue
986
+
987
+ out.append(f"case {name}:")
988
+ out.append(f"{LABEL_PREFIX}{name} : " + "{")
989
+ for line in lines:
990
+ out.append(f" {line}")
991
+ out.append(" /* UNREACHABLE */;")
992
+ out.append(" abort();")
993
+ out.append("}")
994
+
995
+ def buildInternalStates(self, out: list[str]):
996
+ for name, lines in self.stateDict.items():
997
+ if name in self.resumptionTargets:
998
+ continue
999
+
1000
+ out.append(f"{LABEL_PREFIX}{name}: " + "{")
1001
+ for line in lines:
1002
+ out.append(f" {line}")
1003
+ out.append(" /* UNREACHABLE */;")
1004
+ out.append(" abort();")
1005
+ out.append("}")
1006
+
1007
+ def addState(self, state: str, lines: list[str]):
1008
+ assert not self.stateDict.get(state)
1009
+ self.stateDict[state] = lines
1010
+
1011
+ def buildCode(self, code: Code) -> str:
1012
+ if self.codeMap.get(code.ref.name):
1013
+ if self.codeMap[code.ref.name].__dict__ != code.__dict__:
1014
+ raise AssertionError(
1015
+ f'Code name conflict for "{code.ref.name}" {self.codeMap.get(code.ref.name).__dict__} != {code.__dict__}'
1016
+ )
1017
+ # return code.ref.name
1018
+ else:
1019
+ self.codeMap[code.ref.name] = code
1020
+ return code.ref.name
1021
+
1022
+ def getFieldType(self, field: str):
1023
+ for property in self.properties:
1024
+ if property.name == field:
1025
+ return property.ty
1026
+
1027
+ else:
1028
+ raise LookupError(f'Field "{field}" not found')
1029
+
1030
+ # Helpers are different since in python we have duck typing - Vizonex
1031
+ def unwrapCode(
1032
+ self, code: IWrap[_frontend.code.Code], allow_continue: bool = False
1033
+ ):
1034
+ if self.CodeContainer.get(code):
1035
+ # Give some indication that the element has already been built...
1036
+ return self.CodeContainer[code]
1037
+
1038
+ ref = code.ref
1039
+
1040
+ # Check to see if we already have the element in the codemap first.
1041
+ # If we do, return that instead. This will prevent a recursion error...
1042
+
1043
+ if isinstance(ref, _frontend.code.And):
1044
+ r = And(ref)
1045
+ elif isinstance(ref, _frontend.code.IsEqual):
1046
+ r = IsEqual(ref)
1047
+ elif isinstance(ref, _frontend.code.Load):
1048
+ r = Load(ref)
1049
+ elif isinstance(ref, _frontend.code.MulAdd):
1050
+ r = MulAdd(ref)
1051
+ elif isinstance(ref, _frontend.code.Or):
1052
+ r = Or(ref)
1053
+ elif isinstance(ref, _frontend.code.External):
1054
+ # TODO Fix Spans since Span Doesn't have an id with it...
1055
+ # UPDATE Maybe External could be the key to bypass this unethical error
1056
+ r = External(ref)
1057
+ elif isinstance(ref, _frontend.code.Store):
1058
+ r = Store(ref)
1059
+ elif isinstance(ref, _frontend.code.Test):
1060
+ r = Test(ref)
1061
+ elif isinstance(ref, _frontend.code.Update):
1062
+ r = Update(ref)
1063
+ else:
1064
+ raise Exception(
1065
+ f'refrence "{ref.name}" is an Invalid Code Type , TypeName:"{ref.__class__.__name__}"'
1066
+ )
1067
+ self.CodeContainer[code] = r
1068
+
1069
+ return r
1070
+
1071
+ def unwrapNode(self, node: IWrap[_frontend.node.Node]):
1072
+ if self.NodeContainer.get(node):
1073
+ return self.NodeContainer[node]
1074
+
1075
+ ref = node.ref
1076
+ if isinstance(ref, _frontend.node.Consume):
1077
+ r = Consume(ref)
1078
+ elif isinstance(ref, _frontend.node.Empty):
1079
+ r = Empty(ref)
1080
+ elif isinstance(ref, _frontend.node.Error):
1081
+ r = Error(ref)
1082
+ elif isinstance(ref, _frontend.node.Invoke):
1083
+ r = Invoke(ref)
1084
+ elif isinstance(ref, _frontend.node.Pause):
1085
+ r = Pause(ref)
1086
+
1087
+ elif isinstance(ref, _frontend.node.SpanStart):
1088
+ r = SpanStart(ref)
1089
+
1090
+ elif isinstance(ref, _frontend.node.SpanEnd):
1091
+ r = SpanEnd(ref)
1092
+
1093
+ elif isinstance(ref, _frontend.node.Single):
1094
+ r = Single(ref)
1095
+ elif isinstance(ref, _frontend.node.Sequence):
1096
+ r = Sequence(ref)
1097
+ elif isinstance(ref, _frontend.node.TableLookup):
1098
+ r = TableLookup(ref)
1099
+ else:
1100
+ raise TypeError(
1101
+ f'refrence "{ref}" is an Invalid Code Type , TypeName:"{ref.__class__.__name__}"'
1102
+ )
1103
+
1104
+ self.NodeContainer[node] = r
1105
+
1106
+ return r
1107
+
1108
+ def unwrapTransform(self, node: IWrap[_frontend.transform.Transform]):
1109
+ ref = node.ref
1110
+ if isinstance(ref, _frontend.transform.ID):
1111
+ return ID(ref)
1112
+ elif isinstance(ref, _frontend.transform.ToLower):
1113
+ return ToLower(ref)
1114
+ elif isinstance(ref, _frontend.transform.ToLowerUnsafe):
1115
+ return ToLowerUnsafe(ref)
1116
+
1117
+ raise TypeError(
1118
+ f'refrence "{ref.name}" is an Invalid Code Type , TypeName:"{ref.__class__.__name__}"'
1119
+ )
1120
+
1121
+ def indent(self, out: list[str], lines: list[str], pad: str):
1122
+ for line in lines:
1123
+ out.append(f"{pad}{line}")
1124
+
1125
+ def getMatchSequence(self, transform: IWrap[_frontend.transform.Transform]):
1126
+ wrap: Transform = self.unwrapTransform(transform)
1127
+
1128
+ if self.matchSequence.get(wrap.ref.name):
1129
+ res = self.matchSequence[wrap.ref.name]
1130
+ else:
1131
+ res = MatchSequence(wrap)
1132
+ self.matchSequence[wrap.ref.name] = res
1133
+ return res.getName()
1134
+
1135
+ def stateArg(self):
1136
+ return ARG_STATE
1137
+
1138
+ def posArg(self):
1139
+ return ARG_POS
1140
+
1141
+ def endPosArg(self):
1142
+ return ARG_ENDPOS
1143
+
1144
+ def matchVar(self):
1145
+ return VAR_MATCH
1146
+
1147
+ def indexField(self):
1148
+ return self.stateField("_index")
1149
+
1150
+ def currentField(self):
1151
+ return self.stateField("_current")
1152
+
1153
+ def errorField(self):
1154
+ return self.stateField("error")
1155
+
1156
+ def reasonField(self):
1157
+ return self.stateField("reason")
1158
+
1159
+ def errorPosField(self):
1160
+ return self.stateField("error_pos")
1161
+
1162
+ def spanPosField(self, index: int):
1163
+ return self.stateField(f"_span_pos{index}")
1164
+
1165
+ def spanCbField(self, index: int):
1166
+ return self.stateField(f"_span_cb{index}")
1167
+
1168
+ def stateField(self, name: str):
1169
+ return f"{self.stateArg()}->{name}"
1170
+
1171
+ # Globals
1172
+
1173
+ def cstring(self, value: str):
1174
+ return f'"{value}"'
1175
+
1176
+ def blob(self, value: bytes, alignment: Optional[int] = None):
1177
+ if self.blobs.get(value):
1178
+ return self.blobs[value].name
1179
+
1180
+ res = BLOB_PREFIX + str(len(self.blobs))
1181
+ self.blobs[value] = IBlob(value, res, alignment)
1182
+
1183
+ return res
1184
+
1185
+
1186
+ def fix_and_build(ctx: Compilation, out: list[str]):
1187
+ """Helper function that ups with building globals out..."""
1188
+ for code in ctx.codeMap.values():
1189
+ out.append("")
1190
+ code.build(ctx, out)