llparse 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {llparse-0.1.1 → llparse-0.1.2}/PKG-INFO +1 -1
  2. {llparse-0.1.1 → llparse-0.1.2}/llparse/C_compiler.py +28 -30
  3. {llparse-0.1.1 → llparse-0.1.2}/llparse/compilator.py +4 -8
  4. {llparse-0.1.1 → llparse-0.1.2}/llparse/cython_builder.py +1 -0
  5. {llparse-0.1.1 → llparse-0.1.2}/llparse/dot.py +0 -3
  6. llparse-0.1.2/llparse/errors.py +2 -0
  7. {llparse-0.1.1 → llparse-0.1.2}/llparse/frontend.py +1 -3
  8. {llparse-0.1.1 → llparse-0.1.2}/llparse/llparse.py +7 -2
  9. {llparse-0.1.1 → llparse-0.1.2}/llparse/pybuilder/loopchecker.py +80 -39
  10. {llparse-0.1.1 → llparse-0.1.2}/llparse/pybuilder/main_code.py +27 -10
  11. {llparse-0.1.1 → llparse-0.1.2}/llparse/pybuilder/parsemap.py +2 -6
  12. {llparse-0.1.1 → llparse-0.1.2}/llparse/pyfront/front.py +3 -1
  13. {llparse-0.1.1 → llparse-0.1.2}/llparse/pyfront/nodes.py +3 -1
  14. {llparse-0.1.1 → llparse-0.1.2}/llparse/settings.py +0 -1
  15. {llparse-0.1.1 → llparse-0.1.2}/llparse/spanalloc.py +12 -27
  16. {llparse-0.1.1 → llparse-0.1.2}/llparse/test.py +1 -0
  17. {llparse-0.1.1 → llparse-0.1.2}/llparse/trie.py +0 -13
  18. {llparse-0.1.1 → llparse-0.1.2}/llparse.egg-info/PKG-INFO +1 -1
  19. {llparse-0.1.1 → llparse-0.1.2}/llparse.egg-info/SOURCES.txt +4 -2
  20. {llparse-0.1.1 → llparse-0.1.2}/pyproject.toml +1 -1
  21. llparse-0.1.2/tests/test_loop_checker.py +160 -0
  22. llparse-0.1.2/tests/test_span_allocator.py +121 -0
  23. llparse-0.1.1/llparse/tire.py +0 -158
  24. {llparse-0.1.1 → llparse-0.1.2}/LICENSE +0 -0
  25. {llparse-0.1.1 → llparse-0.1.2}/README.md +0 -0
  26. {llparse-0.1.1 → llparse-0.1.2}/llparse/__init__.py +0 -0
  27. {llparse-0.1.1 → llparse-0.1.2}/llparse/constants.py +0 -0
  28. {llparse-0.1.1 → llparse-0.1.2}/llparse/debug.py +0 -0
  29. {llparse-0.1.1 → llparse-0.1.2}/llparse/enumerator.py +0 -0
  30. {llparse-0.1.1 → llparse-0.1.2}/llparse/header.py +0 -0
  31. {llparse-0.1.1 → llparse-0.1.2}/llparse/pybuilder/__init__.py +0 -0
  32. {llparse-0.1.1 → llparse-0.1.2}/llparse/pybuilder/builder.py +0 -0
  33. {llparse-0.1.1 → llparse-0.1.2}/llparse/pyfront/containers.py +0 -0
  34. {llparse-0.1.1 → llparse-0.1.2}/llparse/pyfront/implementation.py +0 -0
  35. {llparse-0.1.1 → llparse-0.1.2}/llparse/pyfront/namespace.py +2 -2
  36. {llparse-0.1.1 → llparse-0.1.2}/llparse/pyfront/peephole.py +0 -0
  37. {llparse-0.1.1 → llparse-0.1.2}/llparse/pyfront/transform.py +0 -0
  38. {llparse-0.1.1 → llparse-0.1.2}/llparse.egg-info/dependency_links.txt +0 -0
  39. {llparse-0.1.1 → llparse-0.1.2}/llparse.egg-info/top_level.txt +0 -0
  40. {llparse-0.1.1 → llparse-0.1.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llparse
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: A Parody of llparse written for writing C Parsers with Python
5
5
  Author-email: Vizonex <VizonexBusiness@gmail.com>
6
6
  Requires-Python: >=3.9
@@ -29,32 +29,32 @@ class CCompiler:
29
29
  out.append("#include <string.h>")
30
30
  out.append("")
31
31
  # Seems LLParse was updated from /* UNREACHABLE */ abort(); to a Macro, Intresting...
32
- out.append('#ifdef __SSE4_2__')
33
- out.append(' #ifdef _MSC_VER')
34
- out.append(' #include <nmmintrin.h>')
35
- out.append(' #else /* !_MSC_VER */')
36
- out.append(' #include <x86intrin.h>')
37
- out.append(' #endif /* _MSC_VER */')
38
- out.append('#endif /* __SSE4_2__ */')
39
- out.append('')
40
-
41
- out.append('#ifdef __ARM_NEON__')
42
- out.append(' #include <arm_neon.h>')
43
- out.append('#endif /* __ARM_NEON__ */')
44
- out.append('')
45
-
46
- out.append('#ifdef __wasm__')
47
- out.append(' #include <wasm_simd128.h>')
48
- out.append('#endif /* __wasm__ */')
49
- out.append('')
50
-
51
- out.append('#ifdef _MSC_VER')
52
- out.append(' #define ALIGN(n) _declspec(align(n))')
53
- out.append(' #define UNREACHABLE __assume(0)')
54
- out.append('#else /* !_MSC_VER */')
55
- out.append(' #define ALIGN(n) __attribute__((aligned(n)))')
56
- out.append(' #define UNREACHABLE __builtin_unreachable()')
57
- out.append('#endif /* _MSC_VER */')
32
+ out.append("#ifdef __SSE4_2__")
33
+ out.append(" #ifdef _MSC_VER")
34
+ out.append(" #include <nmmintrin.h>")
35
+ out.append(" #else /* !_MSC_VER */")
36
+ out.append(" #include <x86intrin.h>")
37
+ out.append(" #endif /* _MSC_VER */")
38
+ out.append("#endif /* __SSE4_2__ */")
39
+ out.append("")
40
+
41
+ out.append("#ifdef __ARM_NEON__")
42
+ out.append(" #include <arm_neon.h>")
43
+ out.append("#endif /* __ARM_NEON__ */")
44
+ out.append("")
45
+
46
+ out.append("#ifdef __wasm__")
47
+ out.append(" #include <wasm_simd128.h>")
48
+ out.append("#endif /* __wasm__ */")
49
+ out.append("")
50
+
51
+ out.append("#ifdef _MSC_VER")
52
+ out.append(" #define ALIGN(n) _declspec(align(n))")
53
+ out.append(" #define UNREACHABLE __assume(0)")
54
+ out.append("#else /* !_MSC_VER */")
55
+ out.append(" #define ALIGN(n) __attribute__((aligned(n)))")
56
+ out.append(" #define UNREACHABLE __builtin_unreachable()")
57
+ out.append("#endif /* _MSC_VER */")
58
58
 
59
59
  out.append("")
60
60
  out.append(
@@ -83,7 +83,7 @@ class CCompiler:
83
83
  out.append("}")
84
84
  out.append("")
85
85
 
86
- # TODO (Vizonex) Make llparse_state_t's Name Optional and alterable incase mixed with
86
+ # TODO (Vizonex) Make llparse_state_t's Name Optional and alterable incase mixed with
87
87
  # llhttp or another parser
88
88
  out.append(f"static llparse_state_t {info.prefix}__run(")
89
89
  out.append(f" {info.prefix}_t* {ARG_STATE},")
@@ -182,9 +182,7 @@ class CCompiler:
182
182
  else:
183
183
  # TODO (Vizonex) Merge lines 139 & 140 together in a future update
184
184
  callback = (
185
- f"(({info.prefix}__span_cb)"
186
- + ctx.spanCbField(span.index)
187
- + f")"
185
+ f"(({info.prefix}__span_cb)" + ctx.spanCbField(span.index) + f")"
188
186
  )
189
187
 
190
188
  args = [ctx.stateArg(), posField, f"(const char*) {ctx.endPosArg()}"]
@@ -170,7 +170,7 @@ class Load(Field):
170
170
  def doBuild(self, ctx: "Compilation", out: list[str]):
171
171
  out.append(f"return {self.field(ctx)};")
172
172
 
173
-
173
+
174
174
  # BIG ONE
175
175
 
176
176
 
@@ -374,7 +374,6 @@ class Consume(Node):
374
374
  index = ctx.stateField(self.ref.field)
375
375
  ty = ctx.getFieldType(self.ref.field)
376
376
 
377
-
378
377
  if ty == "i64":
379
378
  pass
380
379
  elif ty == "i32":
@@ -412,7 +411,6 @@ class Empty(Node):
412
411
  def __init__(self, ref: _frontend.node.Empty) -> None:
413
412
  self.ref = ref
414
413
  super().__init__(ref)
415
-
416
414
 
417
415
  def doBuild(self, out: list[str]):
418
416
  assert self.ref.otherwise
@@ -884,7 +882,7 @@ class Compilation:
884
882
  def buildStateEnum(self, out: list[str]):
885
883
  # TODO (Vizonex) Give out other names that you could pass as an enum statename
886
884
  # this is incase multiple llparse_state_e states are given to compile
887
- # example would be mixing llhttp with some other source...
885
+ # example would be mixing llhttp with some other source...
888
886
  out.append("enum llparse_state_e {")
889
887
  out.append(f" {STATE_ERROR},")
890
888
  for stateName in self.stateDict.keys():
@@ -992,8 +990,7 @@ class Compilation:
992
990
  out.append(f"{LABEL_PREFIX}{name} : " + "{")
993
991
  for line in lines:
994
992
  out.append(f" {line}")
995
- out.append(" /* UNREACHABLE */;")
996
- out.append(" abort();")
993
+ out.append(" UNREACHABLE;")
997
994
  out.append("}")
998
995
 
999
996
  def buildInternalStates(self, out: list[str]):
@@ -1004,8 +1001,7 @@ class Compilation:
1004
1001
  out.append(f"{LABEL_PREFIX}{name}: " + "{")
1005
1002
  for line in lines:
1006
1003
  out.append(f" {line}")
1007
- out.append(" /* UNREACHABLE */;")
1008
- out.append(" abort();")
1004
+ out.append(" UNREACHABLE;")
1009
1005
  out.append("}")
1010
1006
 
1011
1007
  def addState(self, state: str, lines: list[str]):
@@ -2,6 +2,7 @@
2
2
 
3
3
  from contextlib import contextmanager
4
4
  from typing import Optional
5
+
5
6
  from .frontend import IFrontendResult
6
7
  from .pyfront.front import Match
7
8
  from .pyfront.nodes import Invoke
@@ -208,6 +208,3 @@ class Dot:
208
208
 
209
209
  def escape(self, value: str):
210
210
  return "'" + value.replace("\\", "\\$1").replace('"', "\\$1") + "'"
211
-
212
-
213
- # TODO FIX ALL BUFFERS BACK TO STRINGS!
@@ -0,0 +1,2 @@
1
+ class Error(Exception):
2
+ pass
@@ -4,6 +4,7 @@ from typing import Literal, Optional, Union
4
4
  from .enumerator import Enumerator
5
5
  from .pybuilder import LoopChecker
6
6
  from .pybuilder import builder as source
7
+
7
8
  # from pyfront.namespace import code, node , transform
8
9
  from .pyfront import namespace as _frontend
9
10
  from .pyfront.front import Identifier, IWrap, SpanField
@@ -17,13 +18,10 @@ DEFAULT_MIN_TABLE_SIZE = 32
17
18
  DEFAULT_MAX_TABLE_WIDTH = 4
18
19
 
19
20
 
20
-
21
21
  WrappedNode = IWrap[_frontend.node.Node]
22
22
  WrappedCode = IWrap[_frontend.code.Code]
23
23
 
24
24
 
25
-
26
-
27
25
  @dataclass
28
26
  class ITableLookupTarget:
29
27
  trie: TrieEmpty
@@ -4,8 +4,13 @@ from dataclasses import dataclass
4
4
  from typing import Optional
5
5
 
6
6
  from .C_compiler import CCompiler
7
- from .frontend import (DEFAULT_MAX_TABLE_WIDTH, DEFAULT_MIN_TABLE_SIZE,
8
- Frontend, IImplementation, source)
7
+ from .frontend import (
8
+ DEFAULT_MAX_TABLE_WIDTH,
9
+ DEFAULT_MIN_TABLE_SIZE,
10
+ Frontend,
11
+ IImplementation,
12
+ source,
13
+ )
9
14
  from .header import HeaderBuilder
10
15
 
11
16
 
@@ -1,7 +1,13 @@
1
+ import logging
1
2
  from typing import Any, Literal, Union
2
3
 
4
+ from ..errors import Error
3
5
  from ..pybuilder.main_code import Node
4
6
 
7
+ logger = logging.getLogger("llparse.pybuilder.loopchecker")
8
+ logger.setLevel(logging.INFO)
9
+
10
+
5
11
  MAX_VALUE = 256
6
12
  WORD_SIZE = 32
7
13
  SIZE = MAX_VALUE // WORD_SIZE
@@ -19,17 +25,20 @@ assert MAX_VALUE % WORD_SIZE == 0
19
25
 
20
26
  class Lattice:
21
27
  def __init__(
22
- self, value: Union[Any, list[int], Literal["empty"], Literal["any"]]
28
+ self, value: Union[Any, list[int], bytes, Literal["empty"], Literal["any"]]
23
29
  ) -> None:
24
30
  self.value = value
25
31
  self.words: list[int] = []
26
32
 
27
33
  # allocate space by filling in data with zeros...
28
-
29
- for _ in range(SIZE):
30
- self.words.append(0)
31
-
32
- if len(value) > 1:
34
+ if value != "any":
35
+ for _ in range(SIZE):
36
+ self.words.append(0)
37
+ else:
38
+ for _ in range(SIZE):
39
+ self.words.append(WORD_FILL)
40
+
41
+ if isinstance(value, (list, bytes)):
33
42
  for single in value:
34
43
  self.add(single)
35
44
 
@@ -69,6 +78,9 @@ class Lattice:
69
78
  result.words[i] = self.words[i] & other.words[i]
70
79
  return result
71
80
 
81
+ def __repr__(self):
82
+ return f"<Lattice {', '.join(f'{k}: {v!r}' for k, v in self.__dict__.items())}>"
83
+
72
84
  def subtract(self, other: "Lattice") -> "Lattice":
73
85
  result = Lattice("empty")
74
86
  for i in range(SIZE):
@@ -76,7 +88,13 @@ class Lattice:
76
88
  return result
77
89
 
78
90
  def isEqual(self, other: "Lattice"):
79
- return True if (self.value == other.value) else False
91
+ if self.toJSON() == other.toJSON():
92
+ return True
93
+ else:
94
+ for i in range(SIZE):
95
+ if self.words[i] != other.words[i]:
96
+ return False
97
+ return True
80
98
 
81
99
  def toJSON(self):
82
100
  isEmpty = True
@@ -111,7 +129,9 @@ class Reachability:
111
129
  if otherwise:
112
130
  queue.append(otherwise.node)
113
131
 
114
- return list(res)
132
+ # Reverse the order so that we always
133
+ # throw an error on bad configurations...
134
+ return res
115
135
 
116
136
 
117
137
  EMPTY_VALUE = Lattice("empty")
@@ -124,38 +144,45 @@ class LoopChecker:
124
144
  self.terminatedCache: dict[Node, Lattice] = {}
125
145
 
126
146
  def clear(self, nodes: list[Node]):
127
- self.lattice.update({(node, EMPTY_VALUE) for node in nodes})
147
+ for node in nodes:
148
+ self.lattice[node] = EMPTY_VALUE
128
149
 
129
150
  def check(self, root: Node):
130
151
  r = Reachability()
131
152
  nodes = r.build(root)
153
+
132
154
  for node in nodes:
133
155
  self.clear(nodes)
156
+ logger.debug("checking loops starting from %s" % node.name)
134
157
  self.lattice[node] = ANY_VALUE
158
+ # we must eliminate randomness so that error always throw
135
159
  changed: set[Node] = set([root])
136
- while len(changed) != 0:
137
- next = set()
138
- for changedNode in next:
139
- self.propagate(changedNode, next)
140
- changed = next
141
160
 
142
- self.visit(root, list(changed))
161
+ while changed:
162
+ logger.debug("changed %s" % [n.name for n in changed])
163
+ _next = set()
164
+ for changedNode in changed:
165
+ self.propagate(changedNode, _next)
166
+ changed = _next
167
+ logger.debug("lattice stabilized")
168
+ self.visit(root, [])
143
169
 
144
170
  def propagate(self, node: Node, changed: set[Node]):
145
171
  value = self.lattice[node]
146
- terminated: "Lattice" = self.terminate(node, value, changed)
147
- if terminated.value == EMPTY_VALUE.value:
172
+ terminated = self.terminate(node)
173
+ logger.debug("propagate(%r), initial value %r" % (node.name, value.toJSON()))
174
+ if not terminated.isEqual(EMPTY_VALUE):
175
+ logger.debug("node %s terminates %r" % (node.name, terminated.toJSON()))
148
176
  value = value.subtract(terminated)
149
177
  if value.isEqual(EMPTY_VALUE):
150
178
  return
151
179
 
152
- keysbyTarget: dict[Node, Lattice] = dict()
180
+ keysbyTarget: dict[Node, Lattice] = {}
153
181
 
154
182
  for edge in node.getAllEdges():
155
183
  if not edge.noAdvance:
156
184
  continue
157
185
 
158
- targetValue: Lattice
159
186
  if keysbyTarget.get(edge.node):
160
187
  targetValue = keysbyTarget[edge.node]
161
188
  else:
@@ -170,9 +197,14 @@ class LoopChecker:
170
197
  continue
171
198
 
172
199
  targetValue = targetValue.union(edgeValue)
200
+
173
201
  keysbyTarget[edge.node] = targetValue
174
202
 
175
203
  for child, childValue in keysbyTarget.items():
204
+ logger.debug(
205
+ "node %r propagates %r to %r"
206
+ % (node.name, childValue.toJSON(), child.name)
207
+ )
176
208
  self.update(child, childValue, changed)
177
209
  # FINISHED!
178
210
 
@@ -182,13 +214,13 @@ class LoopChecker:
182
214
  return False
183
215
  self.lattice[node] = newValue
184
216
  changed.add(node)
217
+ return True
185
218
 
186
- def terminate(self, node: Node, value: Lattice, changed: set[Node]):
187
- if self.terminatedCache.get(node):
219
+ def terminate(self, node: Node):
220
+ if node in self.terminatedCache:
188
221
  return self.terminatedCache[node]
189
222
 
190
223
  terminated: list[int] = []
191
-
192
224
  for edge in node.getAllEdges():
193
225
  if edge.noAdvance:
194
226
  continue
@@ -204,43 +236,52 @@ class LoopChecker:
204
236
 
205
237
  def visit(self, node: Node, path: list[Node]):
206
238
  value = self.lattice[node]
239
+ logger.debug("enter %s, value is %s" % (node.name, value.toJSON()))
240
+
207
241
  terminated = (
208
- self.terminatedCache[node]
209
- if self.terminatedCache.get(node)
210
- else EMPTY_VALUE
242
+ EMPTY_VALUE
243
+ if node not in self.terminatedCache
244
+ else self.terminatedCache[node]
211
245
  )
212
- if terminated.isEqual(EMPTY_VALUE):
246
+
247
+ if not terminated.isEqual(EMPTY_VALUE):
248
+ logger.debug(f"subtract terminated {terminated}")
213
249
  value = value.subtract(terminated)
214
250
  if value.isEqual(EMPTY_VALUE):
251
+ logger.debug("terminated everything")
215
252
  return
216
253
 
217
254
  for edge in node.getAllEdges():
218
- if edge.noAdvance:
255
+ if not edge.noAdvance:
219
256
  continue
220
257
  edgeValue = value
221
- if not (not edge.key or isinstance(edge.key, int)):
258
+ if edge.key is None or isinstance(edge.key, int):
259
+ pass
260
+ else:
222
261
  edgeValue = edgeValue.intersect(Lattice([edge.key[0]]))
223
262
 
224
263
  if edgeValue.isEqual(EMPTY_VALUE):
264
+ # logger.debug(edge.node.name + " not recursive")
225
265
  continue
226
266
 
227
- def indexOf(path: list, obj):
228
- try:
229
- return path.index(obj)
230
- except Exception:
231
- return -1
267
+ def indexOf(path: list[Node], obj: Node) -> int:
268
+ for o in path:
269
+ if o.name == obj.name:
270
+ return 0
271
+ return -1
232
272
 
233
- if indexOf(path, node) != -1:
234
- if len(path) == 0:
235
- raise Exception(
236
- f'Detected a loop in "{edge.node.name}" though : {edge.node.name}'
273
+ if indexOf(path, edge.node) != -1:
274
+ if len(path) == 1:
275
+ raise Error(
276
+ f'Detected loop in "{edge.node.name}" through "{edge.node.name}"'
237
277
  )
238
278
 
239
- raise Exception(
279
+ raise Error(
240
280
  'Detected loop in "'
241
281
  + edge.node.name
242
282
  + '" through chain '
243
283
  + (" -> ").join(['"' + name.name + '"' for name in path])
244
284
  )
245
285
 
246
- self.visit(edge.node, path.extend([edge.node]))
286
+ self.visit(edge.node, path + [edge.node])
287
+ logger.debug("leave %s" % node.name)
@@ -1,6 +1,16 @@
1
1
  import re
2
+ import sys
2
3
  from dataclasses import dataclass
3
- from typing import Optional, TypeVar, Union
4
+ from typing import Callable, Literal, Optional, TypeVar, Union
5
+
6
+ if sys.version_info < (3, 10):
7
+ from typing_extensions import ParamSpec
8
+ else:
9
+ from typing import ParamSpec
10
+
11
+ _P = ParamSpec("_P")
12
+ _T = TypeVar("_T")
13
+
4
14
 
5
15
  Signature = ["match", "value"]
6
16
 
@@ -18,7 +28,7 @@ def toBuffer(value: Union[str, int]):
18
28
  # TODO Add text validataion...
19
29
 
20
30
 
21
- def validate_text(init):
31
+ def validate_text(init: Callable[_P, _T]) -> Callable[_P, _T]:
22
32
  def is_valid(args, kwargs):
23
33
  if kwargs.get("field"):
24
34
  field = kwargs["field"]
@@ -32,16 +42,20 @@ def validate_text(init):
32
42
 
33
43
 
34
44
  class Code:
35
- def __init__(self, signature: str, name: str) -> None:
36
- assert signature in Signature
45
+ def __init__(self, signature: Literal["match", "value"], name: str) -> None:
46
+ assert signature in Signature, "Invalid signature %s" % signature
37
47
 
38
48
  self.signature = signature
39
49
  self.name = name
50
+
40
51
  def __hash__(self):
41
52
  return hash(self.signature + self.name)
42
53
 
54
+
43
55
  class Field(Code):
44
- def __init__(self, signature: str, name: str, field: str) -> None:
56
+ def __init__(
57
+ self, signature: Literal["match", "value"], name: str, field: str
58
+ ) -> None:
45
59
  self.field = field
46
60
  # if re.search(r"[//\s\\]+",field):
47
61
  # raise TypeError(f"Can\'t access internal field from user code because the field: {name} conatins invalid characters")
@@ -49,11 +63,10 @@ class Field(Code):
49
63
 
50
64
 
51
65
  class FieldValue(Field):
52
- # NOTE I Added for typehinting Here as it refuses to show up on the ide I'm using (Vizonex)
53
-
54
- def __init__(self, signature: str, name: str, field: str, value: int) -> None:
66
+ def __init__(
67
+ self, signature: Literal["match", "value"], name: str, field: str, value: int
68
+ ) -> None:
55
69
  self.value = value
56
- self.field = field
57
70
  super().__init__(signature, name, field)
58
71
 
59
72
 
@@ -132,6 +145,10 @@ class Node:
132
145
  self.otherwiseEdge: Optional["Edge"] = None
133
146
  self.privEdges: list["Edge"] = []
134
147
 
148
+ def key(self):
149
+ """reversed for sorting to prevent python from creating artificial randomness"""
150
+ return self.name
151
+
135
152
  def __hash__(self) -> int:
136
153
  return hash(self.name)
137
154
 
@@ -289,7 +306,7 @@ class Edge:
289
306
  self.node = node
290
307
  self.noAdvance = noAdvance
291
308
 
292
- self.key = key
309
+ self.key = key.encode() if isinstance(key, str) else key
293
310
  self.value = value
294
311
 
295
312
  # Validation...
@@ -1,5 +1,4 @@
1
-
2
- from main_code import Edge, Node
1
+ from ..pybuilder.main_code import Edge, Node
3
2
 
4
3
 
5
4
  class ParserMap:
@@ -9,9 +8,7 @@ class ParserMap:
9
8
  def Jsonize(self):
10
9
  queue = [self.root]
11
10
  seen: set[Node] = set()
12
-
13
-
14
- while len(queue) != 0:
11
+ while queue:
15
12
  node = queue.pop()
16
13
 
17
14
  if node in seen:
@@ -33,5 +30,4 @@ class ParserMap:
33
30
  return {}
34
31
  data = edge.__dict__
35
32
  data["node"] = edge.node
36
- # print(data["node"].name)
37
33
  return data
@@ -36,6 +36,7 @@ class Code:
36
36
  def __hash__(self):
37
37
  return hash(self.cacheKey)
38
38
 
39
+
39
40
  class External(Code):
40
41
  """Inherits from the `Code` class as a subclass of `Code`"""
41
42
 
@@ -46,8 +47,9 @@ class External(Code):
46
47
  @dataclass
47
48
  class Field(Code):
48
49
  """Inherits from `Code`"""
50
+
49
51
  field: str
50
-
52
+
51
53
  def __hash__(self):
52
54
  return hash(self.cacheKey)
53
55
 
@@ -34,6 +34,7 @@ class IUniqueName:
34
34
  # def __hash__(self):
35
35
  # return hash(self.name)
36
36
 
37
+
37
38
  @dataclass
38
39
  class IOtherwiseEdge:
39
40
  node: IWrap["Node"]
@@ -86,6 +87,7 @@ class Node:
86
87
  def __hash__(self):
87
88
  return hash(self.id)
88
89
 
90
+
89
91
  class Consume(Node):
90
92
  def __init__(self, id: IUniqueName, field: str) -> None:
91
93
  self.field = field
@@ -179,7 +181,7 @@ class SpanStart(Node):
179
181
  self.field = field
180
182
  self.callback = callback
181
183
  super().__init__(id)
182
-
184
+
183
185
 
184
186
  class SpanEnd(Node):
185
187
  def __init__(
@@ -25,7 +25,6 @@ SPAN_CALLBACK_MAYBE = "SPAN_CALLBACK_MAYBE"
25
25
  MID_POSTFIX = "_settings"
26
26
 
27
27
 
28
-
29
28
  @dataclass
30
29
  class ApiResult:
31
30
  header: str
@@ -1,22 +1,10 @@
1
+ from collections import OrderedDict
1
2
  from dataclasses import dataclass, field
2
3
  from typing import Union
3
4
 
5
+ from .errors import Error
4
6
  from .pybuilder.main_code import Node, Reachability, Span, SpanEnd, SpanStart
5
7
 
6
-
7
- class DeadLoop(Exception):
8
- """Thrown when this type of loop is detected during complation
9
- ```c
10
- switch(*p){
11
- case dead_loop:
12
- dead_loop : {
13
- goto dead_loop;
14
- } /* Rest in Peace Computer x_x */
15
- }
16
-
17
- ```"""
18
-
19
-
20
8
  SpanSet = set[Span]
21
9
 
22
10
 
@@ -72,17 +60,15 @@ class SpanAllocator:
72
60
  # print("SPAN:%s NODE:%s" % (edgeSpans,edge.node.__dict__))
73
61
  for subSpan in edgeSpans:
74
62
  if subSpan not in spans:
75
- raise AssertionError(
76
- f'Unmatched span end for "{subSpan.callback.name}"'
77
- / f'at "{edge.node.name}", coming from "{node.name}"'
63
+ raise Error(
64
+ f'unmatched span end for "{subSpan.callback.name}"'
65
+ f'at "{edge.node.name}", coming from "{node.name}"'
78
66
  )
79
67
 
80
68
  if isinstance(edge.node, SpanEnd):
81
69
  span = _id(edge.node)
82
70
  if span not in spans:
83
- raise AssertionError(
84
- f'Unmatched span end for "{span.callback.name}"'
85
- )
71
+ raise Error(f'unmatched span end for "{span.callback.name}"')
86
72
 
87
73
  def computeActive(self, nodes: list[Node]):
88
74
  activeMap: dict[Node, SpanSet] = dict()
@@ -111,7 +97,7 @@ class SpanAllocator:
111
97
 
112
98
  if isinstance(edgeNode, SpanStart):
113
99
  if _id(edgeNode) == span:
114
- raise DeadLoop(
100
+ raise Error(
115
101
  f'Detected loop in span {span.callback.name} at "{node.name}"'
116
102
  )
117
103
 
@@ -136,14 +122,14 @@ class SpanAllocator:
136
122
  return overlap
137
123
 
138
124
  def _allocate(self, span: Span):
139
- if self._colors.get(span):
125
+ if span in self._colors:
140
126
  return self._colors[span]
141
127
 
142
128
  overlap = self._overlapMap[span]
143
129
 
144
130
  used: set[int] = set()
145
131
  for subSpan in overlap:
146
- if self._colors.get(subSpan):
132
+ if subSpan in self._colors:
147
133
  used.add(self._colors.get(subSpan))
148
134
  i = 0
149
135
  while i in used:
@@ -160,17 +146,16 @@ class SpanAllocator:
160
146
 
161
147
  self._overlapMap = overlapDict
162
148
 
163
- Map = {span: self._allocate(span) for span in spans}
149
+ colors = {span: self._allocate(span) for span in spans}
164
150
 
165
151
  concurrency = list()
166
152
  for _ in range(self._mx + 1):
167
153
  # NOTE : concurrency[i] = [] doesn't work but this does :P
168
154
  concurrency.append([])
169
155
 
170
- for s in spans:
156
+ for s in sorted(spans, key=lambda s: s.callback.name):
171
157
  concurrency[self._allocate(s)].append(s)
172
-
173
- return ISpanAllocatorResult(Map, concurrency, self._mx)
158
+ return ISpanAllocatorResult(colors, concurrency, self._mx)
174
159
 
175
160
 
176
161
  # TODO (Vizonex) Use Indutny's Mini Http parser to help with testing ours to verify that ours is correct...
@@ -5,6 +5,7 @@ from .pybuilder import Builder
5
5
 
6
6
  # TODO: Remove and make pytest for it.
7
7
 
8
+
8
9
  def smaller_test():
9
10
  # from compilator import Compilation, ICompilerOptions
10
11
  p = Builder()
@@ -18,7 +18,6 @@ class IEdge:
18
18
  return self.key < object.key
19
19
 
20
20
 
21
-
22
21
  @dataclass
23
22
  class TrieNode:
24
23
  """Mainly Used as an Abstract Object for typing"""
@@ -151,15 +150,3 @@ class Trie:
151
150
  children.append(child)
152
151
 
153
152
  return TrieSingle(children, otherwise)
154
-
155
-
156
- # def test():
157
- # t = b"data-to-buffer"
158
-
159
- # for _ in range(2):
160
- # t = t[1:4]
161
- # print(t)
162
-
163
-
164
- # if __name__ == "__main__":
165
- # test()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llparse
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: A Parody of llparse written for writing C Parsers with Python
5
5
  Author-email: Vizonex <VizonexBusiness@gmail.com>
6
6
  Requires-Python: >=3.9
@@ -9,13 +9,13 @@ llparse/cython_builder.py
9
9
  llparse/debug.py
10
10
  llparse/dot.py
11
11
  llparse/enumerator.py
12
+ llparse/errors.py
12
13
  llparse/frontend.py
13
14
  llparse/header.py
14
15
  llparse/llparse.py
15
16
  llparse/settings.py
16
17
  llparse/spanalloc.py
17
18
  llparse/test.py
18
- llparse/tire.py
19
19
  llparse/trie.py
20
20
  llparse.egg-info/PKG-INFO
21
21
  llparse.egg-info/SOURCES.txt
@@ -32,4 +32,6 @@ llparse/pyfront/implementation.py
32
32
  llparse/pyfront/namespace.py
33
33
  llparse/pyfront/nodes.py
34
34
  llparse/pyfront/peephole.py
35
- llparse/pyfront/transform.py
35
+ llparse/pyfront/transform.py
36
+ tests/test_loop_checker.py
37
+ tests/test_span_allocator.py
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "llparse"
3
- version = "0.1.1"
3
+ version = "0.1.2"
4
4
  description = "A Parody of llparse written for writing C Parsers with Python"
5
5
  readme = "README.md"
6
6
  authors = [
@@ -0,0 +1,160 @@
1
+ from llparse.pybuilder import LoopChecker, Builder
2
+ from llparse.errors import Error
3
+ import pytest
4
+
5
+ # based off llparse-builder/test/loop-checker.test.ts
6
+ # Time Taken: 7 hours if you count the hard amounts of debugging, I went through.
7
+
8
+
9
+ @pytest.fixture()
10
+ def loop_checker() -> tuple[LoopChecker, Builder]:
11
+ return LoopChecker(), Builder()
12
+
13
+
14
+ def test_detect_shallow_loops(loop_checker: tuple[LoopChecker, Builder]) -> None:
15
+ lc, b = loop_checker
16
+ start = b.node("start")
17
+ start.otherwise(start)
18
+ with pytest.raises(Error, match=r'Detected loop in "start" through "start"'):
19
+ lc.check(start)
20
+
21
+
22
+ def test_detect_loops(loop_checker: tuple[LoopChecker, Builder]) -> None:
23
+ lc, b = loop_checker
24
+ start = b.node("start")
25
+ a = b.node("a")
26
+ invoke = b.invoke(
27
+ b.code.match("nop"),
28
+ {
29
+ 0: start,
30
+ },
31
+ b.error(1, "error"),
32
+ )
33
+
34
+ start.peek("a", a).otherwise(b.error(1, "error"))
35
+
36
+ a.otherwise(invoke)
37
+ with pytest.raises(Error, match=r'Detected loop in "a".*"a" -> "invoke_nop"'):
38
+ lc.check(start)
39
+
40
+
41
+ def test_detect_shallow_loops_2(loop_checker: tuple[LoopChecker, Builder]) -> None:
42
+ lc, b = loop_checker
43
+ start = b.node("start")
44
+ loop = b.node("loop")
45
+
46
+ start.peek("a", loop).otherwise(b.error(1, "error"))
47
+ loop.match("a", loop).otherwise(loop)
48
+ with pytest.raises(Error, match=r'Detected loop in "loop" through "loop"'):
49
+ lc.check(loop)
50
+
51
+
52
+ def test_ignore_loops_through_peek_to_match(
53
+ loop_checker: tuple[LoopChecker, Builder],
54
+ ) -> None:
55
+ lc, b = loop_checker
56
+ start = b.node("start")
57
+ a = b.node("a")
58
+ invoke = b.invoke(
59
+ b.code.match("nop"),
60
+ {
61
+ 0: start,
62
+ },
63
+ b.error(1, "error"),
64
+ )
65
+
66
+ start.peek("a", a).otherwise(b.error(1, "error"))
67
+
68
+ a.match("abc", invoke).otherwise(start)
69
+ lc.check(start)
70
+
71
+
72
+ def test_ignore_irrelevant_peeks(loop_checker: tuple[LoopChecker, Builder]) -> None:
73
+ lc, b = loop_checker
74
+ start = b.node("start")
75
+ a = b.node("a")
76
+
77
+ start.peek("a", a).otherwise(b.error(1, "error"))
78
+
79
+ a.peek("b", start).otherwise(b.error(1, "error"))
80
+ lc.check(start)
81
+
82
+
83
+ def test_ignore_loops_with_multi_peek_match(
84
+ loop_checker: tuple[LoopChecker, Builder],
85
+ ) -> None:
86
+ lc, b = loop_checker
87
+ start = b.node("start")
88
+ another = b.node("another")
89
+
90
+ NUM: list[str] = [
91
+ "0",
92
+ "1",
93
+ "2",
94
+ "3",
95
+ "4",
96
+ "5",
97
+ "6",
98
+ "7",
99
+ "8",
100
+ "9",
101
+ ]
102
+
103
+ ALPHA: list[str] = [
104
+ "a",
105
+ "b",
106
+ "c",
107
+ "d",
108
+ "e",
109
+ "f",
110
+ "g",
111
+ "h",
112
+ "i",
113
+ "j",
114
+ "k",
115
+ "l",
116
+ "m",
117
+ "n",
118
+ "o",
119
+ "p",
120
+ "q",
121
+ "r",
122
+ "s",
123
+ "t",
124
+ "u",
125
+ "v",
126
+ "w",
127
+ "x",
128
+ "y",
129
+ "z",
130
+ "A",
131
+ "B",
132
+ "C",
133
+ "D",
134
+ "E",
135
+ "F",
136
+ "G",
137
+ "H",
138
+ "I",
139
+ "J",
140
+ "K",
141
+ "L",
142
+ "M",
143
+ "N",
144
+ "O",
145
+ "P",
146
+ "Q",
147
+ "R",
148
+ "S",
149
+ "T",
150
+ "U",
151
+ "V",
152
+ "W",
153
+ "X",
154
+ "Y",
155
+ "Z",
156
+ ]
157
+ start.match(ALPHA, start).peek(NUM, another).skipTo(start)
158
+
159
+ another.match(NUM, another).otherwise(start)
160
+ lc.check(start)
@@ -0,0 +1,121 @@
1
+ from llparse.spanalloc import SpanAllocator
2
+ from llparse.pybuilder import Builder
3
+ from llparse.errors import Error
4
+
5
+ import pytest
6
+
7
+ # Brought over and translated from llparse-builder/tests/span-allocator.ts
8
+
9
+
10
+ @pytest.fixture()
11
+ def span_alloc() -> tuple[SpanAllocator, Builder]:
12
+ return SpanAllocator(), Builder()
13
+
14
+
15
+ def test_allocate_single_span(span_alloc: tuple[SpanAllocator, Builder]) -> None:
16
+ sa, b = span_alloc
17
+ span = b.span(b.code.span("span"))
18
+ start = b.node("start")
19
+ body = b.node("body")
20
+
21
+ start.otherwise(span.start(body))
22
+
23
+ body.skipTo(span.end(start))
24
+
25
+ res = sa.allocate(start)
26
+
27
+ assert res.max == 0
28
+
29
+ assert len(res.concurrency) == 1
30
+ assert span in res.concurrency[0]
31
+
32
+ assert len(res.colors) == 1
33
+ assert res.colors.get(span) == 0
34
+
35
+
36
+ def test_allocate_overlapping_spans(span_alloc: tuple[SpanAllocator, Builder]) -> None:
37
+ sa, b = span_alloc
38
+ span1 = b.span(b.code.span("span1"))
39
+ span2 = b.span(b.code.span("span2"))
40
+
41
+ start = b.node("start")
42
+ body1 = b.node("body1")
43
+ body2 = b.node("body2")
44
+
45
+ start.otherwise(span1.start(body1))
46
+
47
+ body1.otherwise(span2.start(body2))
48
+
49
+ body2.skipTo(span2.end(span1.end(start)))
50
+
51
+ res = sa.allocate(start)
52
+
53
+ # TODO: fix it later... it's supposed to be 1
54
+ assert res.max == 1
55
+
56
+ assert len(res.concurrency) == 2
57
+
58
+ # python loves to shuffle things on me :/ but both exist nevertheless
59
+ assert span2 in res.concurrency[0] or span1 in res.concurrency[0]
60
+ assert span1 in res.concurrency[1] or span2 in res.concurrency[1]
61
+
62
+ assert len(res.colors) == 2
63
+ assert res.colors.get(span2) in [0, 1]
64
+ assert res.colors.get(span1) in [0, 1]
65
+
66
+
67
+ def test_allocate_non_overlapping_spans(
68
+ span_alloc: tuple[SpanAllocator, Builder],
69
+ ) -> None:
70
+ sa, b = span_alloc
71
+ span1 = b.span(b.code.span("span1"))
72
+ span2 = b.span(b.code.span("span2"))
73
+
74
+ start = b.node("start")
75
+ body1 = b.node("body1")
76
+ body2 = b.node("body2")
77
+
78
+ start.match("a", span1.start(body1)).otherwise(span2.start(body2))
79
+ body1.skipTo(span1.end(start))
80
+
81
+ body2.skipTo(span2.end(start))
82
+
83
+ res = sa.allocate(start)
84
+
85
+ assert res.max == 0
86
+
87
+ assert len(res.concurrency) == 1
88
+ assert span1 in res.concurrency[0]
89
+ assert span2 in res.concurrency[0]
90
+
91
+ assert len(res.colors) == 2
92
+ assert res.colors.get(span1) == 0
93
+ assert res.colors.get(span2) == 0
94
+
95
+
96
+ def test_should_throw_on_loops(span_alloc: tuple[SpanAllocator, Builder]) -> None:
97
+ sa, b = span_alloc
98
+ start = b.node("start")
99
+ end = b.node("end")
100
+ span = b.span(b.code.span("on_data"))
101
+
102
+ start.match("a", end).match("b", span.start(end)).otherwise(b.error(1, "error"))
103
+
104
+ end.otherwise(span.end(start))
105
+
106
+ with pytest.raises(Error, match=r"unmatched.*on_data"):
107
+ sa.allocate(start)
108
+
109
+ def test_propagate_through_invoke_map(span_alloc:tuple[SpanAllocator, Builder]):
110
+ sa, b = span_alloc
111
+ start = b.node('start')
112
+ span = b.span(b.code.span('llparse__on_data'))
113
+
114
+ b.property('i8', 'custom')
115
+
116
+ start.otherwise(b.invoke(b.code.load('custom'), {
117
+ 0: span.end().skipTo(start),
118
+ }, span.end().skipTo(start)))
119
+
120
+ sa.allocate(span.start(start))
121
+
@@ -1,158 +0,0 @@
1
- from .pybuilder.main_code import Node, Edge
2
- from typing import Optional
3
-
4
- from dataclasses import dataclass
5
-
6
- @dataclass
7
- class IEdge:
8
- # NOTE THIS SHOULD BE STRICTLY BYTES !!!
9
- key: bytes
10
- node: Node
11
- noAdvance:bool
12
- value:Optional[int] = None
13
-
14
- def __lt__(self,object):
15
- return self.key < object.key
16
-
17
- # TODO RENAME TO TRIE!
18
-
19
- class TireNode:
20
- """Mainly Used as an Abstract Object for typing"""
21
- def __init__(self) -> None:
22
- return
23
-
24
- class TireSequence(TireNode):
25
- def __init__(self,select:bytes,child:TireNode) -> None:
26
- self.select = select
27
- self.child = child
28
-
29
-
30
- class ITireSingleChild:
31
- def __init__(self,key:int,noAdvance:bool,node:TireNode) -> None:
32
- self.key = key
33
- self.noAdvance = noAdvance
34
- self.node = node
35
-
36
- class TireEmpty(TireNode):
37
- def __init__(self,node:Node,value:int) -> None:
38
- self.node = node
39
- self.value = value
40
-
41
- class TireSingle(TireNode):
42
- def __init__(self,children:list[ITireSingleChild],otherwise:Optional[TireEmpty] = None) -> None:
43
- self.children = children
44
- self.otherwise = otherwise
45
-
46
-
47
-
48
-
49
-
50
- # TODO Retry making Tire....
51
-
52
- class Tire:
53
- def __init__(self,name:str) -> None:
54
- self.name = name
55
-
56
- def build(self,edges:list[Edge]):
57
- if len(edges) == 0:
58
- return None
59
-
60
- internalEdges: list[IEdge] = []
61
-
62
- for edge in edges:
63
- key = str(edge.key) if isinstance(edge.key,int) else edge.key
64
- internalEdges.append(IEdge(key.encode("utf-8") if isinstance(key,str) else key ,edge.node,edge.noAdvance,edge.value))
65
- return self.level(internalEdges,[])
66
-
67
- def level(self,edges:list[IEdge],path:list[bytes]):
68
- first = edges[0].key
69
- last = edges[-1].key
70
-
71
- if len(edges) == 1 and len(edges[0].key) == 0:
72
- return TireEmpty(edges[0].node,edges[0].value)
73
-
74
- i = 0
75
- for i in range(len(first)):
76
- if first[i] != last[i]:
77
- break
78
-
79
- if i > 1:
80
- # NOTE I think Idutny intended for these sequences
81
- # to advance otherwise not having this would case a recursion error
82
- # This is why first[1:i] is used and not first[0:count] like in typescript...
83
- return self.sequence(edges,first[: i + 1 ],path)
84
-
85
- return self.single(edges,path)
86
-
87
- def Slice(self,edges:list[IEdge],off:int):
88
- slice = [IEdge(edge.key[off:],edge.node,edge.noAdvance,edge.value) for edge in edges]
89
- return sorted(slice, key = lambda k: k.key)
90
-
91
-
92
- def sequence(self,edges:list[IEdge],prefix:bytes,path:list[bytes]):
93
- sliced = self.Slice(edges,len(prefix))
94
- assert not any([edge.noAdvance for edge in edges])
95
- child = self.level(sliced,path + [prefix])
96
- return TireSequence(prefix,child)
97
-
98
- def single(self,edges:list[IEdge],path:list[bytes]):
99
-
100
- if len(edges[0].key) == 0:
101
- if len(path) == 0:
102
- AssertionError(f'Empty root entry at "{self.name}"')
103
- if not (len(edges) == 1 or len(edges[1].key) != 0):
104
- err = f'Duplicate entries in "{self.name}" at: [' + (b", ".join(path).decode("utf-8")) + ']'
105
-
106
- raise AssertionError(err)
107
-
108
-
109
- keys : dict[int,list[IEdge]] = {}
110
-
111
- for edge in edges:
112
- if len(edge.key) == 0:
113
- otherwise = TireEmpty(edge.node,edge.value)
114
- continue
115
-
116
- key = edge.key[0]
117
-
118
- if keys.get(key):
119
- keys[key].append(edge)
120
- else:
121
- keys[key] = [edge]
122
-
123
-
124
- otherwise = None
125
- children : list[ITireSingleChild] = []
126
-
127
- for key, subEdges in keys.items():
128
- # TODO LOG FUNCTION's ARGUMENTS TO DETERMINE WEATHER OR NOT IT'S the Problem...
129
- # I think this maybe the problem now that I think about it...
130
- sliced = self.Slice(subEdges, 1)
131
-
132
-
133
- subPath = path + [chr(key).encode("utf-8")]
134
-
135
- noAdvance = any([e.noAdvance for e in subEdges])
136
- allSame = all([e.noAdvance == noAdvance for e in subEdges])
137
-
138
- if not (allSame or len(subEdges) == 0):
139
- err = f'Conflicting `.peek` and `.match` entries in "{self.name}" at: [' + (b", ".join(subPath).decode("utf-8")) + ']'
140
- raise TypeError(err)
141
- child = ITireSingleChild(key,noAdvance,self.level(sliced,subPath))
142
- children.append(child)
143
-
144
-
145
- return TireSingle(children,otherwise)
146
-
147
-
148
-
149
-
150
- def test():
151
- t = b"data-to-buffer"
152
-
153
- for _ in range(2):
154
- t = t[1:4]
155
- print(t)
156
-
157
- if __name__ == "__main__":
158
- test()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -1,3 +1,3 @@
1
- from ..pyfront import transform as transform
2
- from ..pyfront import nodes as node
3
1
  from ..pyfront import front as code
2
+ from ..pyfront import nodes as node
3
+ from ..pyfront import transform as transform
File without changes