pPEGpy 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,77 @@
1
+ # A pPEG Parser Machine Developer Kit
2
+
3
+ This directory contains Python code samples for the incremental development of a pPEG parser machine, as explained in [pPEG-machine].
4
+
5
+ Each step has a code file that can be run as a Python script
6
+
7
+ #### Step 1: [Machine-1]
8
+
9
+ First toy parser for date grammar example
10
+ No parse tree generation
11
+ 4-instruction parser machine
12
+ 50 LOC grammar and parser code
13
+ 50 LOC parser machine
14
+
15
+ #### Step 2: [Machine-2]
16
+
17
+ Toy parser for date grammar example
18
+ now generating a parse tree.
19
+ 4-instruction parser machine,
20
+ 60 LOC parser machine
21
+
22
+ #### Step 3: [Machine-3]
23
+
24
+ More instructions, date grammar example
25
+ 7-instruction parser machine,
26
+ 100 LOC parser machine
27
+
28
+ #### Step 4: [Machine-4]
29
+
30
+ Parser for pPEG boot grammar
31
+ 8-instruction parser machine,
32
+ 170 LOC parser machine
33
+
34
+ #### Step 5: [Machine-5]
35
+
36
+ Parser for full pPEG grammar,
37
+ 8-instruction parser machine,
38
+ parser_code from pPEG ptree,
39
+ export grammar compile API
40
+ 200 LOC parser machine
41
+
42
+ #### Step 6: [Machine-6]
43
+
44
+ Parser for full pPEG grammar,
45
+ 8-instruction parser machine,
46
+ parser_code from pPEG ptree,
47
+ export grammar compile API
48
+ 250 LOC parser machine
49
+
50
+ #### Step 7: [Machine-7]
51
+
52
+ Parser for full pPEG grammar,
53
+ 8-instruction parser machine,
54
+ parser_code from pPEG ptree,
55
+ export grammar compile API
56
+ 250 LOC parser machine
57
+
58
+ #### Full pPEG implementation: [Machine-8]
59
+
60
+ Full code for pPEG parser machine.
61
+ 50 LOC pPEG grammar source and Json ptree
62
+ 250 LOC parser machine
63
+ 100 LOC compiler
64
+ 100 LOC fault and trace reporting
65
+ 120 LOC extensions
66
+ 700 LOC total
67
+
68
+ [pPEG-machine]: https://github.com/pcanz/pPEG/blob/master/docs/pPEG-machine.md
69
+
70
+ [Machine-1]: https://github.com/pcanz/pPEGpy/blob/master/DeveloperKit/machine-1.py
71
+ [Machine-2]: https://github.com/pcanz/pPEGpy/blob/master/DeveloperKit/machine-2.py
72
+ [Machine-3]: https://github.com/pcanz/pPEGpy/blob/master/DeveloperKit/machine-3.py
73
+ [Machine-4]: https://github.com/pcanz/pPEGpy/blob/master/DeveloperKit/machine-4.py
74
+ [Machine-5]: https://github.com/pcanz/pPEGpy/blob/master/DeveloperKit/machine-5.py
75
+ [Machine-6]: https://github.com/pcanz/pPEGpy/blob/master/DeveloperKit/machine-6.py
76
+ [Machine-7]: https://github.com/pcanz/pPEGpy/blob/master/DeveloperKit/machine-7.py
77
+ [Machine-8]: https://github.com/pcanz/pPEGpy/blob/master/pPEG.py
@@ -0,0 +1,114 @@
1
+ """
2
+ Step 1:
3
+ date grammar,
4
+ 4-instruction parser machine,
5
+ no parse tree generation.
6
+ """
7
+
8
+ date_grammar = """
9
+ date = year '-' month '-' day
10
+ year = d d d d
11
+ month = d d
12
+ day = d d
13
+ d = '0'/'1'/'2'/'4'/'5'/'6'/'7'/'8'/'9'
14
+ """
15
+
16
+ date_ptree = ["Peg",[
17
+ ["rule", [["id", "date"],
18
+ ["seq", [["id", "year"], ["sq", "'-'"],
19
+ ["id", "month"], ["sq", "'-'"], ["id", "day"]]]]],
20
+ ["rule", [["id", "year"],
21
+ ["seq", [["id", "d"],["id", "d"],
22
+ ["id", "d"],["id", "d"]]]]],
23
+ ["rule", [["id", "month"],
24
+ ["seq", [["id", "d"], ["id", "d"]]]]],
25
+ ["rule", [["id", "day"],
26
+ ["seq", [["id", "d"], ["id", "d"]]]]],
27
+ ["rule", [["id", "d"],
28
+ ["alt", [["sq", "'0'"], ["sq", "'1'"], ["sq", "'2'"],
29
+ ["sq", "'3'"], ["sq", "'4'"], ["sq", "'5'"], ["sq", "'6'"],
30
+ ["sq", "'7'"], ["sq", "'8'"], ["sq", "'9'"]]]]]
31
+ ]]
32
+
33
+ date_code = {
34
+ "date":
35
+ ["seq", [["id", "year"], ["sq", "'-'"],
36
+ ["id", "month"], ["sq", "'-'"], ["id", "day"]]],
37
+ "year":
38
+ ["seq", [["id", "d"],["id", "d"],
39
+ ["id", "d"],["id", "d"]]],
40
+ "month":
41
+ ["seq", [["id", "d"], ["id", "d"]]],
42
+ "day":
43
+ ["seq", [["id", "d"], ["id", "d"]]],
44
+ "d":
45
+ ["alt", [["sq", "'0'"], ["sq", "'1'"], ["sq", "'2'"],
46
+ ["sq", "'3'"], ["sq", "'4'"], ["sq", "'5'"], ["sq", "'6'"],
47
+ ["sq", "'7'"], ["sq", "'8'"], ["sq", "'9'"]]],
48
+ "$start": ["id", "date"]
49
+ }
50
+
51
+ class Env():
52
+ def __init__(self, code, input):
53
+ self.code = code
54
+ self.input = input
55
+ self.pos = 0
56
+ self.end = len(input)
57
+
58
+ def parse(code, input):
59
+ env = Env(code, input)
60
+ result = eval(code["$start"], env)
61
+ return (result, env.pos)
62
+
63
+ def id(exp, env):
64
+ name = exp[1]
65
+ expr = env.code[name]
66
+ return eval(expr, env)
67
+
68
+ def seq(exp, env):
69
+ start = env.pos
70
+ for arg in exp[1]:
71
+ if not eval(arg, env):
72
+ env.pos = start
73
+ return False
74
+ return True
75
+
76
+ def alt(exp, env):
77
+ start = env.pos
78
+ for arg in exp[1]:
79
+ if eval(arg, env):
80
+ return True
81
+ env.pos = start
82
+ return False
83
+
84
+ def sq(exp, env):
85
+ for c in exp[1][1:-1]:
86
+ if env.pos >= env.end or c != env.input[env.pos]:
87
+ return False
88
+ env.pos += 1
89
+ return True
90
+
91
+ instruct = {
92
+ "id": id,
93
+ "seq": seq,
94
+ "alt": alt,
95
+ "sq": sq
96
+ }
97
+
98
+ def eval(exp, env):
99
+ print(exp, exp[0])
100
+ return instruct[exp[0]](exp, env)
101
+
102
+
103
+
104
+ print( parse(date_code, "2021-03-04") ) # eval exp ...
105
+
106
+ """ Impementation Notes:
107
+
108
+ seq and alt reset the current pos after a failure
109
+
110
+ sq needs to check for end of input
111
+
112
+ sq needs to skip the quoted quote marks
113
+
114
+ """
@@ -0,0 +1,132 @@
1
+
2
+ """
3
+ Step 2:
4
+ date grammar,
5
+ 4-instruction parser machine,
6
+ generating a parse tree.
7
+ """
8
+
9
+ date_grammar = """
10
+ date = year '-' month '-' day
11
+ year = d d d d
12
+ month = d d
13
+ day = d d
14
+ d = '0'/'1'/'2'/'4'/'5'/'6'/'7'/'8'/'9'
15
+ """
16
+
17
+ date_ptree = ["Peg",[
18
+ ["rule", [["id", "date"],
19
+ ["seq", [["id", "year"], ["sq", "'-'"],
20
+ ["id", "month"], ["sq", "'-'"], ["id", "day"]]]]],
21
+ ["rule", [["id", "year"],
22
+ ["seq", [["id", "d"],["id", "d"],
23
+ ["id", "d"],["id", "d"]]]]],
24
+ ["rule", [["id", "month"],
25
+ ["seq", [["id", "d"], ["id", "d"]]]]],
26
+ ["rule", [["id", "day"],
27
+ ["seq", [["id", "d"], ["id", "d"]]]]],
28
+ ["rule", [["id", "d"],
29
+ ["alt", [["sq", "'0'"], ["sq", "'1'"], ["sq", "'2'"],
30
+ ["sq", "'3'"], ["sq", "'4'"], ["sq", "'5'"], ["sq", "'6'"],
31
+ ["sq", "'7'"], ["sq", "'8'"], ["sq", "'9'"]]]]]
32
+ ]]
33
+
34
+ date_code = {
35
+ "date":
36
+ ["seq", [["id", "year"], ["sq", "'-'"],
37
+ ["id", "month"], ["sq", "'-'"], ["id", "day"]]],
38
+ "year":
39
+ ["seq", [["id", "d"],["id", "d"],
40
+ ["id", "d"],["id", "d"]]],
41
+ "month":
42
+ ["seq", [["id", "d"], ["id", "d"]]],
43
+ "day":
44
+ ["seq", [["id", "d"], ["id", "d"]]],
45
+ "d":
46
+ ["alt", [["sq", "'0'"], ["sq", "'1'"], ["sq", "'2'"],
47
+ ["sq", "'3'"], ["sq", "'4'"], ["sq", "'5'"], ["sq", "'6'"],
48
+ ["sq", "'7'"], ["sq", "'8'"], ["sq", "'9'"]]],
49
+ "$start": ["id", "date"]
50
+ }
51
+
52
+ class Env():
53
+ def __init__(self, code, input):
54
+ self.code = code
55
+ self.input = input
56
+ self.pos = 0
57
+ self.end = len(input)
58
+ self.tree = [] # build parse tree
59
+
60
+ def parse(code, input):
61
+ env = Env(code, input)
62
+ result = eval(code["$start"], env)
63
+ return (result, env.pos, env.tree)
64
+
65
+ def id(exp, env):
66
+ name = exp[1]
67
+ start = env.pos
68
+ stack = len(env.tree)
69
+ name = exp[1]
70
+ expr = env.code[name]
71
+ result = eval(expr, env)
72
+ if not result: return False
73
+ size = len(env.tree)
74
+ if size-stack > 1:
75
+ env.tree[stack:] = [[name, env.tree[stack:]]]
76
+ return True
77
+ if size == stack:
78
+ env.tree.append([name, env.input[start:env.pos]])
79
+ return True
80
+ return True # elide redundant rule name
81
+
82
+ def seq(exp, env):
83
+ start = env.pos
84
+ stack = len(env.tree)
85
+ for arg in exp[1]:
86
+ if not eval(arg, env):
87
+ if len(env.tree) > stack:
88
+ env.tree = env.tree[0:stack]
89
+ env.pos = start
90
+ return False
91
+ return True
92
+
93
+ def alt(exp, env):
94
+ start = env.pos
95
+ stack = len(env.tree)
96
+ for arg in exp[1]:
97
+ if eval(arg, env):
98
+ return True
99
+ if len(env.tree) > stack:
100
+ env.tree = env.tree[0:stack]
101
+ env.pos = start
102
+ return False
103
+
104
+ def sq(exp, env):
105
+ for c in exp[1][1:-1]:
106
+ if env.pos >= env.end or c != env.input[env.pos]:
107
+ return False
108
+ env.pos += 1
109
+ return True
110
+
111
+ instruct = {
112
+ "id": id,
113
+ "seq": seq,
114
+ "alt": alt,
115
+ "sq": sq
116
+ }
117
+
118
+ def eval(exp, env):
119
+ print(exp, exp[0])
120
+ return instruct[exp[0]](exp, env)
121
+
122
+ print( parse(date_code, "2021-03-04") ) # eval exp ...
123
+
124
+ """ Impementation Notes:
125
+
126
+ Add parse tree building in id rule.
127
+
128
+ Add reset tree in seq and alt
129
+
130
+ TODO: upper case rule names and anon underscore rule names.
131
+
132
+ """
@@ -0,0 +1,149 @@
1
+ """
2
+ Step 3:
3
+ date grammar,
4
+ 7-instruction parser machine,
5
+ """
6
+
7
+ date_grammar = """
8
+ date = year '-' month '-' day
9
+ year = [0-9]+
10
+ month = [0-9]+
11
+ day = [0-9]+
12
+ """
13
+
14
+ date_code = {
15
+ "date":
16
+ ["seq", [["id", "year"], ["sq", "'-'"],
17
+ ["id", "month"], ["sq", "'-'"], ["id", "day"]]],
18
+ "year":
19
+ ["rep", [["chs", "[0-9]"],["sfx", "+"]]],
20
+ "month":
21
+ ["rep", [["chs", "[0-9]"],["sfx", "+"]]],
22
+ "day":
23
+ ["rep", [["chs", "[0-9]"],["sfx", "+"]]],
24
+ "$start":
25
+ ["id", "date"]
26
+ }
27
+
28
+ class Env():
29
+ def __init__(self, code, input):
30
+ self.code = code
31
+ self.input = input
32
+ self.pos = 0
33
+ self.end = len(input)
34
+ self.tree = [] # build parse tree
35
+
36
+ def parse(code, input):
37
+ env = Env(code, input)
38
+ result = eval(code["$start"], env)
39
+ return (result, env.pos, env.tree)
40
+
41
+ def id(exp, env):
42
+ name = exp[1]
43
+ start = env.pos
44
+ stack = len(env.tree)
45
+ name = exp[1]
46
+ expr = env.code[name]
47
+ result = eval(expr, env)
48
+ if not result: return False
49
+ size = len(env.tree)
50
+ if size-stack > 1:
51
+ env.tree[stack:] = [[name, env.tree[stack:]]]
52
+ return True
53
+ if size == stack:
54
+ env.tree.append([name, env.input[start:env.pos]])
55
+ return True
56
+ return True # elide redundant rule name
57
+
58
+ def seq(exp, env):
59
+ start = env.pos
60
+ stack = len(env.tree)
61
+ for arg in exp[1]:
62
+ if not eval(arg, env):
63
+ if len(env.tree) > stack:
64
+ env.tree = env.tree[0:stack]
65
+ env.pos = start
66
+ return False
67
+ return True
68
+
69
+ def alt(exp, env):
70
+ start = env.pos
71
+ stack = len(env.tree)
72
+ for arg in exp[1]:
73
+ if eval(arg, env): return True
74
+ if len(env.tree) > stack:
75
+ env.tree = env.tree[0:stack]
76
+ env.pos = start
77
+ return False
78
+
79
+ def rep(exp, env):
80
+ [_rep, [expr, [_sfx, sfx]]] = exp
81
+ min, max = 0, 0 # sfx == "*"
82
+ if sfx == "+": min = 1
83
+ elif sfx == "?": max = 1
84
+ count = 0
85
+ while True:
86
+ start = env.pos
87
+ result = eval(expr, env)
88
+ if result == False: break
89
+ if env.pos == start: break # no progress
90
+ count += 1
91
+ if count == max: break # max 0 means any
92
+ if count < min: return False
93
+ return True
94
+
95
+ def sq(exp, env):
96
+ for c in exp[1][1:-1]:
97
+ if env.pos >= env.end or c != env.input[env.pos]:
98
+ return False
99
+ env.pos += 1
100
+ return True
101
+
102
+ def dq(exp, env):
103
+ for c in exp[1][1:-1]:
104
+ if c == " ":
105
+ while env.pos < env.end and env.input[env.pos] <= " ": env.pos += 1
106
+ continue
107
+ if env.pos >= env.end or c != env.input[env.pos]: return False
108
+ env.pos += 1
109
+ return True
110
+
111
+ def chs(exp, env):
112
+ if env.pos >= env.end: return False
113
+ str = exp[1]
114
+ n = len(str)
115
+ ch = env.input[env.pos]
116
+ i = 1 # "[...]"
117
+ while i < n-1:
118
+ if i+2 < n-1 and str[i+1] == '-':
119
+ if ch < str[i] or ch > str[i+2]:
120
+ i += 3
121
+ continue
122
+ elif ch != str[i]:
123
+ i += 1
124
+ continue
125
+ env.pos += 1
126
+ return True
127
+ return False
128
+
129
+ instruct = {
130
+ "id": id,
131
+ "seq": seq,
132
+ "alt": alt,
133
+ "rep": rep,
134
+ "sq": sq,
135
+ "dq": dq,
136
+ "chs": chs,
137
+ }
138
+
139
+ def eval(exp, env):
140
+ print(exp, exp[0])
141
+ return instruct[exp[0]](exp, env)
142
+
143
+ print( parse(date_code, "2021-03-04") ) # eval exp ...
144
+
145
+ """ Impementation Notes:
146
+
147
+ Adds rep, dq, and chs instructions
148
+
149
+ """