pPEGpy 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pPEGpy/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ def hello() -> str:
2
+ return "Hello from ppegpy!"
pPEGpy/peg.py ADDED
@@ -0,0 +1,766 @@
1
+ # pPEGpy -- run with Python 3.10+ 2025-05-18
2
+
3
+ from __future__ import annotations # parser() has a forward ref to Code as type
4
+
5
+ import array
6
+
7
+
8
+ # -- pPEG grammar ------------------------------------------------------------
9
+
10
+ peg_grammar = R"""
11
+ Peg = _ rule+
12
+ rule = id _ def _ alt
13
+ def = [:=]+
14
+ alt = seq ('/' _ seq)*
15
+ seq = rep+
16
+ rep = pre sfx? _
17
+ pre = pfx? term
18
+ term = call / quote / class / dot / group / extn
19
+ group = '(' _ alt ')'
20
+ call = id _ !def
21
+ id = [a-zA-Z_] [a-zA-Z0-9_]*
22
+ pfx = [~!&]
23
+ sfx = [+?] / '*' nums?
24
+ nums = min ('..' max)?
25
+ min = [0-9]+
26
+ max = [0-9]*
27
+ quote = ['] ~[']* ['] 'i'?
28
+ class = '[' ~']'* ']'
29
+ dot = '.'_
30
+ extn = '<' ~'>'* '>'
31
+ _ = ([ \t\n\r]+ / '#' ~[\n\r]*)*
32
+ """
33
+
34
+ # -- Parse context for parser run function -----------------------------------
35
+
36
+
37
+ class Parse:
38
+ def __init__(self, code: Code, input: str):
39
+ self.ok = True
40
+ self.code = code
41
+ self.input = input
42
+ self.pos = 0
43
+ self.end = len(input)
44
+
45
+ # parse tree arrays -- L = 4 Bytes, H = 2 Bytes, B = 1 Byte
46
+ self.idents = array.array("H") # rule ident: <<id:12, type:4>>
47
+ self.sizes = array.array("L") # node shape: <<size:24, depth:8>>
48
+ self.starts = array.array("L") # input start index
49
+ self.ends = array.array("L") # input end index
50
+
51
+ # run state...
52
+ self.anon = False # True when running anon rules
53
+ self.deep = 0 # tree depth, deep to avoid name conflict with self.depth()
54
+ self.max_depth = 255 # catch left recursion
55
+
56
+ # faults...
57
+ self.max_pos = -1 # peak fail
58
+ self.first = -1 # node at max pos failure
59
+ self.top = -1 # parent of first node
60
+ self.end_pos = -1 # fell short end pos
61
+
62
+ def __str__(self):
63
+ if self.ok:
64
+ return show_tree(self)
65
+ else:
66
+ return err_report(self)
67
+
68
+ def name(self, i): # parse tree node name
69
+ ident = self.idents[i] # [id:0xFFF|type:0xF]
70
+ return self.code.names[ident >> 4]
71
+
72
+ def text(self, i): # parse tree node matched text
73
+ start = self.starts[i]
74
+ end = self.ends[i]
75
+ return self.input[start:end]
76
+
77
+ def size(self, i):
78
+ return self.sizes[i] >> 8
79
+
80
+ def depth(self, i):
81
+ return self.sizes[i] & 0xFF
82
+
83
+ def leaf(self, i): # is a terminal node?
84
+ return self.idents[i] & 0xF == TERM
85
+
86
+ def tree(self):
87
+ ptree = p_tree(self, 0, len(self.ends))
88
+ if not ptree:
89
+ return []
90
+ return ptree[0]
91
+
92
+ def itree(self):
93
+ itree = i_tree(self, 0, len(self.ends))
94
+ if not itree:
95
+ return []
96
+ return itree[0]
97
+
98
+ def dump(self, filter=1):
99
+ return dump_tree(self, filter)
100
+
101
+ def transform(self, i=0, j=-1, **fns):
102
+ if j < 0:
103
+ j = self.size(i)
104
+ return transformer(self, i, j, fns)
105
+
106
+
107
+ # -- the parser function itself -------------------
108
+
109
+
110
+ def parser(code: Code, input: str) -> Parse:
111
+ parse = Parse(code, input)
112
+ if not code.ok:
113
+ parse.ok = False
114
+ return parse
115
+ ok = run(parse, ["id", 0])
116
+ if ok and parse.pos < len(parse.input):
117
+ parse.end_pos = parse.pos
118
+ ok = False
119
+ parse.ok = ok
120
+ if parse.ok:
121
+ prune_tree(parse) # delete failures and redundant heads
122
+ return parse
123
+
124
+
125
+ # -- the run engine that does all the work ----------------------------
126
+
127
+
128
+ def run(parse: Parse, expr: list):
129
+ match expr:
130
+ case ["id", idx]:
131
+ # execute anon ids....
132
+ if parse.anon:
133
+ return run(parse, parse.code.codes[idx])
134
+ defx = parse.code.defs[idx]
135
+ if defx == ANON:
136
+ parse.anon = True
137
+ ok = run(parse, parse.code.codes[idx])
138
+ parse.anon = False
139
+ return ok
140
+
141
+ # all other ids.............
142
+ pos = parse.pos
143
+ depth = parse.deep
144
+ parse.deep += 1
145
+ if parse.deep > parse.max_depth:
146
+ raise SystemExit(f"*** run away recursion, in: {parse.code.names[idx]}")
147
+
148
+ # parse tree array - enter node ------------
149
+ index = len(parse.starts)
150
+ parse.starts.append(pos)
151
+ parse.idents.append((idx << 4) | defx)
152
+ parse.ends.append(0) # assign at index after run
153
+ parse.sizes.append(0) # assign at index after run
154
+
155
+ # -- run -----------------------
156
+ rule = parse.code.codes[idx]
157
+ ok = run(parse, rule) # ok = True/False
158
+ # ------------------------------
159
+
160
+ if not ok and parse.pos >= parse.max_pos:
161
+ parse.top = index # parent of peak failure
162
+ if parse.pos > parse.max_pos:
163
+ parse.max_pos = parse.pos
164
+ parse.first = index # root of peak failure
165
+
166
+ # parse tree ---------------
167
+ parse.ends[index] = parse.pos
168
+ size = len(parse.ends) - index
169
+ parse.sizes[index] = (size << 8) | depth
170
+ if not ok:
171
+ parse.idents[index] |= FAIL # fail flag
172
+
173
+ parse.deep -= 1
174
+ return ok
175
+
176
+ case ["alt", list]:
177
+ pos = parse.pos
178
+ max = pos
179
+ for x in list:
180
+ if run(parse, x):
181
+ if parse.pos > pos: # treat empty match as failure
182
+ return True
183
+ if parse.pos > pos:
184
+ max = pos
185
+ parse.pos = pos # reset (essential)
186
+ parse.pos = max # to be caught in id
187
+ return False
188
+
189
+ case ["seq", list]:
190
+ for i, x in enumerate(list):
191
+ if not run(parse, x):
192
+ return False
193
+ return True
194
+
195
+ case ["rept", min, max, exp]:
196
+ pos = parse.pos
197
+ if not run(parse, exp):
198
+ if min == 0:
199
+ parse.pos = pos # reset
200
+ return True # * ?
201
+ return False # +
202
+ if max == 1:
203
+ return True # ?
204
+ count = 1
205
+ pos1 = parse.pos
206
+ while True:
207
+ result = run(parse, exp)
208
+ if parse.pos == pos1:
209
+ break
210
+ if not result:
211
+ parse.pos = pos1 # reset loop last try
212
+ break
213
+ pos1 = parse.pos
214
+ count += 1
215
+ if count == max:
216
+ break
217
+ if min > 0 and count < min:
218
+ return False
219
+ return True
220
+
221
+ case ["pred", op, term]: # !x &x
222
+ pos = parse.pos
223
+ result = run(parse, term)
224
+ parse.pos = pos # reset
225
+ if op == "!":
226
+ return not result
227
+ return result
228
+
229
+ case ["neg", term]: # ~x
230
+ if parse.pos >= parse.end:
231
+ return False
232
+ pos = parse.pos
233
+ result = run(parse, term)
234
+ parse.pos = pos # reset
235
+ if result:
236
+ return False
237
+ parse.pos += 1
238
+ return True
239
+
240
+ case ["quote", str, i]:
241
+ for ch in str: # 'abc' compiler strips quotes
242
+ if parse.pos >= parse.end:
243
+ return False
244
+ char = parse.input[parse.pos]
245
+ if i:
246
+ char = char.upper()
247
+ if char != ch:
248
+ return False
249
+ parse.pos += 1
250
+ return True
251
+
252
+ case ["class", chars]:
253
+ if parse.pos >= parse.end:
254
+ return False
255
+ char = parse.input[parse.pos]
256
+ max = len(chars) - 1 # eg [a-z0-9_]
257
+ i = 1
258
+ while i < max:
259
+ a = chars[i]
260
+ if i + 2 < max and chars[i + 1] == "-":
261
+ if char >= a and char <= chars[i + 2]:
262
+ parse.pos += 1
263
+ return True
264
+ i += 3
265
+ else:
266
+ if char == a:
267
+ parse.pos += 1
268
+ return True
269
+ i += 1
270
+ return False
271
+
272
+ case ["dot"]:
273
+ if parse.pos >= parse.end:
274
+ return False
275
+ parse.pos += 1
276
+ return True
277
+
278
+ case ["extn", chars]: # TODO compile into function call
279
+ args = chars[1:-1].split()
280
+ extra = parse.code.extras.get(args[0], None)
281
+ if extra:
282
+ return extra(parse, args)
283
+ print(f"extn args: {args}")
284
+ return False
285
+
286
+ case ["ext", fn]:
287
+ return fn(parse)
288
+
289
+ case _:
290
+ raise Exception("*** crash: run: undefined expression...")
291
+
292
+
293
+ # -- prune parse tree -- removes failures and redundant nodes -------------------
294
+
295
+ # failures are included in the parse tree to help with debug and fault reporting
296
+ # it is more efficient to delete redundant nodes as a separate pass at the end..
297
+
298
+
299
+ def prune_tree(parse):
300
+ # These were too tricky for me to combine! So two steps...
301
+ _, i = prune(parse, 1, 0, len(parse.ends), 0, 0) # step 1 delete failures
302
+ _, j = prune(parse, 2, 0, i, 0, 0) # step 2 delete redundant nodes
303
+ while j < len(parse.ends): # array API has no len/cap access
304
+ parse.idents.pop()
305
+ parse.sizes.pop()
306
+ parse.starts.pop()
307
+ parse.ends.pop()
308
+
309
+
310
+ def prune(parse, step, i, j, k, depth): # -> (i, k)
311
+ while i < j: # read: i..j ==> write: k..
312
+ ident = parse.idents[i]
313
+ fail = (ident & 8) != 0
314
+ size = parse.size(i)
315
+ if fail: # step == 1
316
+ i += size
317
+ continue
318
+ start = parse.starts[i]
319
+ end = parse.ends[i]
320
+ if size == 1:
321
+ i1 = i + 1
322
+ k1 = k + 1
323
+ else:
324
+ elide = (step == 2) and (ident & 7) == EQ
325
+ if elide and i + 1 < j and size - 1 == parse.size(i + 1):
326
+ i += 1
327
+ continue
328
+ i1, k1 = prune(parse, step, i + 1, i + size, k + 1, depth + 1)
329
+ size = k1 - k
330
+ if size == 1 and (ident & 3) != HEAD:
331
+ ident = (ident & 0xFFF8) | TERM # leaf node
332
+ parse.idents[k] = ident
333
+ parse.starts[k] = start
334
+ parse.ends[k] = end
335
+ parse.sizes[k] = (size << 8) | (depth & 0xFF)
336
+ k = k1
337
+ i = i1
338
+ return (i, k)
339
+
340
+
341
+ # -- ptree json -----------------------------------------------------------------
342
+
343
+
344
+ def p_tree(parse: Parse, i, j):
345
+ arr = []
346
+ while i < j:
347
+ if parse.leaf(i):
348
+ arr.append([parse.name(i), parse.text(i)])
349
+ else:
350
+ arr.append([parse.name(i), p_tree(parse, i + 1, i + parse.size(i))])
351
+ i += parse.size(i)
352
+ return arr
353
+
354
+
355
+ # -- itree json -----------------------------------------------------------------
356
+
357
+
358
+ def i_tree(p: Parse, i, j):
359
+ arr = []
360
+ while i < j:
361
+ size = p.size(i)
362
+ args = None if p.leaf(i) else i_tree(p, i + 1, i + size)
363
+ arr.append([p.name(i), p.starts[i], p.ends[i], args])
364
+ i += size
365
+ return arr
366
+
367
+
368
+ # -- ptree line diagram --------------------------------------------------------
369
+
370
+
371
+ def show_tree(parse: Parse) -> str:
372
+ lines = []
373
+ for i in range(0, len(parse.ends)):
374
+ value = f" {repr(parse.text(i))}" if parse.leaf(i) else ""
375
+ lines.append(f"{indent_bars(parse.depth(i))}{parse.name(i)}{value}")
376
+ return "\n".join(lines)
377
+
378
+
379
+ # -- debug dump of parse tree nodes --------------------------------------------
380
+
381
+
382
+ def dump_tree(parse: Parse, filter=1) -> None:
383
+ print("Node Size Span Tree Input...", end="")
384
+ failed = False # flag any failed nodes (for an end note)
385
+ pos = 0 # to fill in any anon text matched between nodes
386
+ for i in range(0, len(parse.ends)):
387
+ ident = parse.idents[i]
388
+ id = ident >> 4
389
+ name = parse.code.names[id]
390
+ fail = (ident & FAIL) != 0
391
+ if fail:
392
+ failed = True # just for a note at end of the dump output
393
+ start = parse.starts[i]
394
+ end = parse.ends[i]
395
+ shape = parse.sizes[i]
396
+ size = shape >> 8
397
+ depth = shape & 0xFF
398
+ if fail:
399
+ if filter == 1 and start == end:
400
+ continue
401
+ name = "!" + name
402
+ anon = ""
403
+ if pos < start:
404
+ anon = f" -> {parse.input[pos:start]!r}"
405
+ pos = end
406
+ print(anon) # appends '-> anon' to end of line for previous node
407
+ # now for the node print out....
408
+ init = f"{i:3} {size:3} {start:3}..{end}"
409
+ value = f"{repr(parse.input[start:end])}" if ident & 3 == TERM else ""
410
+ report = f"{init:16} {indent_bars(depth)}{name} {value}"
411
+ etc = "" # truncate long lines...
412
+ if end - start > 30:
413
+ end = start + 30
414
+ etc = "..."
415
+ text = f"{parse.input[start:end]!r}{etc}"
416
+ print(f"{report:70} {text}", end="")
417
+ # next loop: print(anon) to append -> text at end of this line
418
+ anon = ""
419
+ if pos < parse.max_pos: # final last node anon text...
420
+ anon = f" -> {parse.input[pos : parse.max_pos]!r}"
421
+ print(anon)
422
+ if filter == 1 and failed:
423
+ print(
424
+ "Note: empty failures have been omitted (use parse.dump(0) to see everything)."
425
+ )
426
+
427
+
428
+ # -- Parse error reporting ---------------------------------------------------
429
+
430
+
431
+ def show_pos(parse, info=""):
432
+ pos = max(parse.pos, parse.max_pos)
433
+ sol = line_start(parse, pos - 1)
434
+ eol = line_end(parse, pos)
435
+ ln = line_number(parse.input, sol)
436
+ left = f"line {ln} | {parse.input[sol + 1 : pos]}"
437
+ prior = "" # show previous line...
438
+ if sol > 0:
439
+ sol1 = line_start(parse, sol - 1)
440
+ prior = f"line {ln - 1} | {parse.input[sol1 + 1 : sol]}\n"
441
+ if pos == parse.end:
442
+ return f"{prior}{left}\n{' ' * len(left)}^ {info}"
443
+ return f"{prior}{left}{parse.input[pos]}{parse.input[pos + 1 : eol]}\n{' ' * len(left)}^ {info}"
444
+
445
+
446
+ def line_start(parse, sol):
447
+ while sol >= 0 and parse.input[sol] != "\n":
448
+ sol -= 1
449
+ return sol
450
+
451
+
452
+ def line_end(parse, eol):
453
+ while eol < parse.end and parse.input[eol] != "\n":
454
+ eol += 1
455
+ return eol
456
+
457
+
458
+ def indent_bars(size):
459
+ # return '| '*size
460
+ # return '\u2502 '*size
461
+ # return '\x1B[38;5;253m\u2502\x1B[0m '*size
462
+ return "\x1b[38;5;253m" + "\u2502 " * size + "\x1b[0m"
463
+
464
+
465
+ def line_number(input, i):
466
+ if i < 0:
467
+ return 1
468
+ if i >= len(input):
469
+ i = len(input) - 1
470
+ n = 1
471
+ while i >= 0:
472
+ while i >= 0 and input[i] != "\n":
473
+ i -= 1
474
+ n += 1
475
+ i -= 1
476
+ return n
477
+
478
+
479
+ def rule_info(parse):
480
+ if parse.top > parse.first and parse.end_pos > -1:
481
+ return "unexpected ending"
482
+ target = parse.first
483
+ if parse.first < len(parse.ends) - 1 and parse.top < parse.first:
484
+ target = parse.top
485
+ name = parse.name(target)
486
+ if parse.starts[target] == parse.ends[target]:
487
+ note = " expected"
488
+ else:
489
+ note = " failed"
490
+ return src_map(parse, name, note)
491
+
492
+
493
+ def src_map(parse, name, note=""):
494
+ peg_parse = parse.code.peg_parse
495
+ if not peg_parse:
496
+ return name + note + " in boot-code..."
497
+ lines = [name + note]
498
+ # show grammar rule....
499
+ for i in range(0, len(peg_parse.ends)):
500
+ if peg_parse.name(i) != "rule":
501
+ continue
502
+ if peg_parse.text(i + 1) != name:
503
+ continue
504
+ lines.append(f"{peg_parse.text(i)}")
505
+ return "\n".join(lines)
506
+
507
+
508
+ def err_report(parse):
509
+ note = "... for more details use: parse.dump() ..."
510
+ at_pos = f"at: {max(parse.pos, parse.max_pos)} of: {parse.end} {note}"
511
+ if parse.code and parse.code.err:
512
+ title = f"*** grammar failed {at_pos}"
513
+ errs = "\n".join(parse.code.err)
514
+ return f"{title}\n{errs}\n{show_pos(parse)}"
515
+ title = f"*** parse failed {at_pos}"
516
+ return f"""{title}\n{show_pos(parse, rule_info(parse))}"""
517
+
518
+
519
+ # == pPEG ptree is compiled into a Code object with instructions for parser ======================
520
+
521
+
522
+ class Code:
523
+ def __init__(self, peg_parse, extras=None, boot=None):
524
+ self.peg_parse = peg_parse # Parse of Peg grammar (None for boot)
525
+ self.ptree = boot or peg_parse.tree()
526
+ self.names = [] # rule name
527
+ self.rules = [] # rule body expr
528
+ self.codes = [] # compiled expr
529
+ self.defs = [] # rule type, defn symbol
530
+ self.extras = extras # extension functions
531
+ self.err = []
532
+ self.ok = True
533
+ self.compose()
534
+
535
+ def compose(self):
536
+ names_defs_rules(self)
537
+ self.codes = [emit(self, x) for x in self.rules]
538
+ if self.err:
539
+ self.ok = False
540
+
541
+ def __str__(self):
542
+ if not self.ok:
543
+ return f"code error: {self.err}"
544
+ lines = []
545
+ for i, rule in enumerate(self.names):
546
+ lines.append(f"{i:2}: {rule} {DEFS[self.defs[i]]} {self.codes[i]}")
547
+ return "\n".join(lines)
548
+
549
+ def parse(self, input):
550
+ return parser(self, input)
551
+
552
+ def errors(self):
553
+ return "\n".join(self.err)
554
+
555
+
556
+ # -- rule types ------------------------------------------------------------------
557
+
558
+ DEFS = ["=", ":", ":=", "=:"]
559
+
560
+ EQ = 0 # = dynamic children: 0 => TERM, 1 => redundant, >1 => HEAD
561
+ ANON = 1 # : rule name and results not in the parse tree
562
+ HEAD = 2 # := parent node with any number of children
563
+ TERM = 3 # =: terminal leaf node text match
564
+
565
+ FAIL = 8 # flag bit
566
+
567
+ # -- compile Parse into Code parser instructions -----------------------------------
568
+
569
+
570
+ def names_defs_rules(code: Code) -> None:
571
+ for rule in code.ptree[1]:
572
+ match rule:
573
+ case ["rule", [["id", name], ["def", defn], expr]]:
574
+ code_rule_defs(code, name, defn, expr)
575
+ case ["rule", [["id", name], expr]]: # core peg grammar bootstrap
576
+ code_rule_defs(code, name, "=", expr)
577
+ case _:
578
+ code.err.append(f"Expected 'rule', is this a Peg ptree?\n {rule}")
579
+ break
580
+
581
+
582
+ def code_rule_defs(code, name, defn, expr):
583
+ if name in code.names:
584
+ code.err.append(f"duplicate rule name: {name}")
585
+ code.names.append(name)
586
+ code.rules.append(expr)
587
+ try:
588
+ defx = DEFS.index(defn)
589
+ except ValueError:
590
+ defx = FAIL
591
+ code.err.append(f"undefined: {name} {defn} ...")
592
+ if defx == EQ:
593
+ if name[0] == "_":
594
+ defx = ANON
595
+ elif name[0] >= "A" and name[0] <= "Z":
596
+ defx = HEAD
597
+ code.defs.append(defx)
598
+
599
+
600
+ def emit(code, expr):
601
+ match expr:
602
+ case ["id", name]:
603
+ try:
604
+ idx = code.names.index(name)
605
+ except ValueError:
606
+ code.err.append(f"undefined rule: {name}")
607
+ code_rule_defs(code, name, "=", ["extn", "<undefined>"])
608
+ code.codes.append(["extn", "<undefined>"])
609
+ return ["id", len(code.names) - 1]
610
+ return ["id", idx]
611
+ case ["alt", nodes]:
612
+ return ["alt", [emit(code, x) for x in nodes]]
613
+ case ["seq", nodes]:
614
+ return ["seq", [emit(code, x) for x in nodes]]
615
+ case ["rep", [exp, ["sfx", op]]]:
616
+ min = 0
617
+ max = 0
618
+ if op == "+":
619
+ min = 1
620
+ elif op == "?":
621
+ max = 1
622
+ return ["rept", min, max, emit(code, exp)]
623
+ case ["rep", [exp, ["min", min]]]:
624
+ min = int(min)
625
+ return ["rept", min, min, emit(code, exp)]
626
+ case ["rep", [exp, ["nums", [["min", min], ["max", max]]]]]:
627
+ min = int(min)
628
+ max = 0 if not max else int(max)
629
+ return ["rept", min, max, emit(code, exp)]
630
+ case ["pre", [["pfx", pfx], exp]]:
631
+ if pfx == "~":
632
+ return ["neg", emit(code, exp)]
633
+ return ["pred", pfx, emit(code, exp)]
634
+ case ["quote", str]:
635
+ if str[-1] != "i":
636
+ return ["quote", escape(str[1:-1], code), False]
637
+ return ["quote", escape(str[1:-2].upper(), code), True]
638
+ case ["class", str]:
639
+ return ["class", escape(str, code)]
640
+ case ["dot", _]:
641
+ return ["dot"]
642
+ case ["extn", _]:
643
+ return expr
644
+ case _:
645
+ raise Exception(f"*** crash: emit: undefined expression: {expr}")
646
+
647
+
648
+ def escape(s, code):
649
+ r = ""
650
+ i = 0
651
+ while i < len(s):
652
+ c = s[i]
653
+ i += 1
654
+ if c == "\\" and i < len(s):
655
+ k = s[i]
656
+ i += 1
657
+ if k == "n":
658
+ c = "\n"
659
+ elif k == "r":
660
+ c = "\r"
661
+ elif k == "t":
662
+ c = "\t"
663
+ elif k == "x":
664
+ c, i = hex_value(2, s, i)
665
+ elif k == "u":
666
+ c, i = hex_value(4, s, i)
667
+ elif k == "U":
668
+ c, i = hex_value(8, s, i)
669
+ else:
670
+ i -= 1
671
+ if c is None:
672
+ code.err.append(f"bad escape code: {s}")
673
+ return s
674
+ r += c
675
+ return r
676
+
677
+
678
+ def hex_value(n, s, i):
679
+ if i + n > len(s):
680
+ return (None, i)
681
+ try:
682
+ code = int(s[i : i + n], 16)
683
+ except Exception:
684
+ return (None, i)
685
+ return (chr(code), i + n)
686
+
687
+
688
+ # -- parse.transform -----------------------------------------------------------
689
+
690
+
691
+ def transformer(p: Parse, i, j, fns):
692
+ vals = []
693
+ while i < j:
694
+ name = p.name(i)
695
+ fn = fns.get(name)
696
+ if p.leaf(i):
697
+ text = p.text(i)
698
+ i += 1
699
+ if fn:
700
+ vals.append(apply(name, fn, text))
701
+ else:
702
+ vals.append([name, text])
703
+ else:
704
+ k = i + p.size(i)
705
+ result = transformer(p, i + 1, k, fns)
706
+ i = k
707
+ if fn:
708
+ vals.append(apply(name, fn, result))
709
+ else:
710
+ vals.append([name, result])
711
+ if len(vals) == 1:
712
+ return vals[0]
713
+ return vals
714
+
715
+
716
+ def apply(name, fn, args):
717
+ result = None
718
+ try:
719
+ result = fn(args)
720
+ except Exception as err:
721
+ raise SystemExit(f"{name}({args})\n{err}")
722
+ return result
723
+
724
+
725
+ # -- peg_grammar ptree -- bootstrap generated ---------------------------------------------------------
726
+
727
+ peg_ptree = ['Peg', [
728
+ ['rule', [['id', 'Peg'], ['def', '='], ['seq', [['id', '_'], ['rep', [['id', 'rule'], ['sfx', '+']]]]]]],
729
+ ['rule', [['id', 'rule'], ['def', '='], ['seq', [['id', 'id'], ['id', '_'], ['id', 'def'], ['id', '_'], ['id', 'alt']]]]],
730
+ ['rule', [['id', 'def'], ['def', '='], ['rep', [['class', '[:=]'], ['sfx', '+']]]]],
731
+ ['rule', [['id', 'alt'], ['def', '='], ['seq', [['id', 'seq'], ['rep', [['seq', [['quote', "'/'"], ['id', '_'], ['id', 'seq']]], ['sfx', '*']]]]]]],
732
+ ['rule', [['id', 'seq'], ['def', '='], ['rep', [['id', 'rep'], ['sfx', '+']]]]],
733
+ ['rule', [['id', 'rep'], ['def', '='], ['seq', [['id', 'pre'], ['rep', [['id', 'sfx'], ['sfx', '?']]], ['id', '_']]]]],
734
+ ['rule', [['id', 'pre'], ['def', '='], ['seq', [['rep', [['id', 'pfx'], ['sfx', '?']]], ['id', 'term']]]]],
735
+ ['rule', [['id', 'term'], ['def', '='], ['alt', [['id', 'call'], ['id', 'quote'], ['id', 'class'], ['id', 'dot'], ['id', 'group'], ['id', 'extn']]]]],
736
+ ['rule', [['id', 'group'], ['def', '='], ['seq', [['quote', "'('"], ['id', '_'], ['id', 'alt'], ['quote', "')'"]]]]],
737
+ ['rule', [['id', 'call'], ['def', '='], ['seq', [['id', 'id'], ['id', '_'], ['pre', [['pfx', '!'], ['id', 'def']]]]]]],
738
+ ['rule', [['id', 'id'], ['def', '='], ['seq', [['class', '[a-zA-Z_]'], ['rep', [['class', '[a-zA-Z0-9_]'], ['sfx', '*']]]]]]],
739
+ ['rule', [['id', 'pfx'], ['def', '='], ['class', '[~!&]']]],
740
+ ['rule', [['id', 'sfx'], ['def', '='], ['alt', [['class', '[+?]'], ['seq', [['quote', "'*'"], ['rep', [['id', 'nums'], ['sfx', '?']]]]]]]]],
741
+ ['rule', [['id', 'nums'], ['def', '='], ['seq', [['id', 'min'], ['rep', [['seq', [['quote', "'..'"], ['id', 'max']]], ['sfx', '?']]]]]]],
742
+ ['rule', [['id', 'min'], ['def', '='], ['rep', [['class', '[0-9]'], ['sfx', '+']]]]],
743
+ ['rule', [['id', 'max'], ['def', '='], ['rep', [['class', '[0-9]'], ['sfx', '*']]]]],
744
+ ['rule', [['id', 'quote'], ['def', '='], ['seq', [['class', "[']"], ['rep', [['pre', [['pfx', '~'], ['class', "[']"]]], ['sfx', '*']]], ['class', "[']"], ['rep', [['quote', "'i'"], ['sfx', '?']]]]]]],
745
+ ['rule', [['id', 'class'], ['def', '='], ['seq', [['quote', "'['"], ['rep', [['pre', [['pfx', '~'], ['quote', "']'"]]], ['sfx', '*']]], ['quote', "']'"]]]]],
746
+ ['rule', [['id', 'dot'], ['def', '='], ['seq', [['quote', "'.'"], ['id', '_']]]]],
747
+ ['rule', [['id', 'extn'], ['def', '='], ['seq', [['quote', "'<'"], ['rep', [['pre', [['pfx', '~'], ['quote', "'>'"]]], ['sfx', '*']]], ['quote', "'>'"]]]]],
748
+ ['rule', [['id', '_'], ['def', '='], ['rep', [['alt', [['rep', [['class', '[ \\t\\n\\r]'], ['sfx', '+']]], ['seq', [['quote', "'#'"], ['rep', [['pre', [['pfx', '~'], ['class', '[\\n\\r]']]], ['sfx', '*']]]]]]], ['sfx', '*']]]]]
749
+ ]] # fmt: skip
750
+
751
+ # == pPEG compile API =========================================================
752
+
753
+ peg_code = Code(None, {}, peg_ptree) # boot compile
754
+
755
+
756
+ def compile(grammar, extras=None) -> Code:
757
+ parse = parser(peg_code, grammar)
758
+ if not parse.ok:
759
+ raise SystemExit("*** grammar fault...\n" + err_report(parse))
760
+ code = Code(parse, extras)
761
+ if not code.ok:
762
+ raise SystemExit("*** grammar errors...\n" + code.errors())
763
+ return code
764
+
765
+
766
+ peg_code = compile(peg_grammar) # to improve grammar error reporting
pPEGpy/py.typed ADDED
File without changes
@@ -0,0 +1,99 @@
1
+ Metadata-Version: 2.4
2
+ Name: pPEGpy
3
+ Version: 0.3.2
4
+ Summary: pPEG -- portable PEG in Python
5
+ Author-email: Peter Cashin <cashin.peter@gmail.com>
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.12
8
+ Description-Content-Type: text/markdown
9
+
10
+ # pPEGpy
11
+
12
+ This is an implementation of [pPEG] in Python.
13
+
14
+ The package pPEGpy was created with: uv init --lib
15
+
16
+ from pPEGpy import peg
17
+
18
+ The peg.py file (in src/pPEGpy) is a module with no dependencies.
19
+
20
+ ## Example
21
+
22
+ ``` py
23
+ from pPEGpy import peg
24
+
25
+ # Equivalent to the regular expression for well-formed URI's in RFC 3986.
26
+
27
+ pURI = pPEG.compile("""
28
+ URI = (scheme ':')? ('//' auth)? path ('?' query)? ('#' frag)?
29
+ scheme = ~[:/?#]+
30
+ auth = ~[/?#]*
31
+ path = ~[?#]*
32
+ query = ~'#'*
33
+ frag = ~[ \t\n\r]*
34
+ """)
35
+
36
+ if not pURI.ok: raise Exception("URI grammar error: "+pURI.err)
37
+
38
+ test = "http://www.ics.uci.edu/pub/ietf/uri/#Related";
39
+
40
+ uri = pURI.parse(test)
41
+
42
+ if uri.ok: print(uri.ptree)
43
+ else: print(uri.err)
44
+
45
+ """
46
+ ["URI",[["scheme","http"],["auth","www.ics.uci.edu"],["path","/pub/ietf/uri/"],["frag","Related"]]]
47
+ """
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ The pPEG.py implementation is a single file with no dependencies.
53
+
54
+ Put a copy of the pPEG.py file into the same directory as your application, or use a PYTHONPATH shell environment variable for Python to load the pPEG.py module.
55
+
56
+ Not yet available for `pip` install.
57
+
58
+ Basic usage:
59
+
60
+ ``` py
61
+ import pPEG
62
+
63
+ my_parser = pPEG.compile(""... my grammar rules...""")
64
+
65
+ # For the grammar rules see the [pPEG] documentation, then:
66
+
67
+ my_parse = my_parser.parse(""...input string...")
68
+
69
+ print(my_parse) # prints the ptree result or an error message
70
+ ```
71
+ Common usage:
72
+
73
+ ``` py
74
+ import pPEG
75
+
76
+ my_parser = pPEG.compile(""... my grammar rules...""")
77
+
78
+ if not my_parser.ok: raise Exception(my_parser.err)
79
+
80
+ # -- use my-parser in my application .......
81
+
82
+ my_parse = my_parser.parse('...input string...}')
83
+
84
+ if not my_parse.ok:
85
+ print(my_parse.err)
86
+ .... handle parse failure ...
87
+ else:
88
+ process(my_parse.ptree)
89
+ ```
90
+
91
+ The `ptree` parse tree type is JSON data, as defined in [pPEG].
92
+
93
+ ## Notes
94
+
95
+ The uv init --lib made the project name lower case ppegpy, I editied the name back to pPEGpy in several places (.toml, src/pPEGpy/)
96
+
97
+ ---
98
+
99
+ [pPEG]: https://github.com/pcanz/pPEG
@@ -0,0 +1,7 @@
1
+ pPEGpy/__init__.py,sha256=Rb0b2s0jtvFaDPZSAjRRHA3K0BC8jz6lgm_xEDUD6xY,52
2
+ pPEGpy/peg.py,sha256=8d5bWHFl-YME6KC12AM-_mqA1MdBTQzo1JHfOfTXLQU,25362
3
+ pPEGpy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
+ ppegpy-0.3.2.dist-info/METADATA,sha256=61pr7V0TDSBn9YOi-ac8B-h8nu_zsZnN8ZTRvVHy854,2297
5
+ ppegpy-0.3.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ ppegpy-0.3.2.dist-info/licenses/LICENSE,sha256=Dk5HlE5DQI458fPxOm3WpT5m4MCFZ59LOzWjXNirrtY,1062
7
+ ppegpy-0.3.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2021 pcanz
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.