oo_peg 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../parsers'
4
+ require_relative 'advanced/operator_parser'
5
+ require_relative 'advanced/sexp_parser'
6
+ require_relative 'advanced/string_parser'
7
+ require_relative 'advanced/symbol_parser'
8
+
9
+ module OOPeg
10
+ module Parsers
11
+ ##
12
+ #
13
+ # == The String Parser
14
+ #
15
+ # By default it parses strings that start with a +'+ or +"+, do not
16
+ # support extrapolation and allow to escape the delimitting quotes
17
+ # with a backslash.
18
+ #
19
+ # # example: default StringParser
20
+ #
21
+ # parse(string_parser, '"Hello"').ast => 'Hello'
22
+ # parse(string_parser, %{"'Hello'"}).ast => "'Hello'"
23
+ # parse(string_parser, %{'"Hello"'}).ast => '"Hello"'
24
+ #
25
+ # Including quotes, can be done with backslashes
26
+ #
27
+ # # example: escaping quotes
28
+ #
29
+ # parse(string_parser, '"Hello\\"World\\""and more').ast => 'Hello"World"'
30
+ #
31
+ # Escaping quotes by doubling them is not the default behavior...
32
+ #
33
+ # # example: no double escaping by default
34
+ #
35
+ # parse(string_parser, '"Hello""World"""and more').ast => 'Hello'
36
+ #
37
+ # ... but can be enabled with the +doubled_escape+ flag
38
+ #
39
+ # # example: double escaping enabled
40
+ #
41
+ # parse(string_parser(doubled_escape: true), '"Hello""World"""and more').ast => 'Hello"World"'
42
+ #
43
+ # == The Symbol Parser
44
+ #
45
+ # is nothing more than a parser that parses a sequence of a prefix character (defaulting to +:+)
46
+ # followed by a sequence of characters parsed by the +id_parser+.
47
+ #
48
+ # Therefore all configurations of the +id_parser+ apply also to the +symbol_parser+.
49
+ #
50
+ # However, the returned ast is not a +String+, but a +Symbol+
51
+ #
52
+ # # example: the default symbol parser
53
+ #
54
+ # parse(symbol_parser, ':hello ').ast => :hello
55
+ #
56
+ # == Putting it all together: The SexpParser
57
+ #
58
+ # with all the above and all the other predefined parsers it is now very easy
59
+ # to expose an +sexp_parser+
60
+ #
61
+ # It is configured by the following parameters
62
+ #
63
+ # - the parentheses pairs which default to <tt>(), {}</tt> and <tt>[]</tt>.
64
+ # - the +head_parser+ which defaults to the +tail_parser+ but can set
65
+ # be to a different parser. It defines how the first element of a sexp can be parsed.
66
+ # - the +tail_parser+ which defaults to a selection of +string_parser+, +int_parser+
67
+ # +symbol_parser+, +operator_parser+ and, of course, recursively +sexp_parser+
68
+ # - the +seperation_parser+ which defaults to the +ws_parser+
69
+ #
70
+ # So basically the grammar which is parsed is like the following
71
+ #
72
+ # s_exp ::= "(" inner ")" | "{" inner "}" | "[" inner "]"
73
+ # inner ::= [seperator? head [seperator tail]... seperator?]?
74
+ # head ::= tail ; unless configured differently
75
+ # tail ::= string | symbol | integer | operator | s_exp
76
+ #
77
+ # Here is a quite long...
78
+ #
79
+ # # example: An s-expression
80
+ #
81
+ # s_expression = <<-EOS
82
+ # (fn stupid [a]
83
+ # (let [x 42 y (succ x) z (+ x y)]
84
+ # (if (> a z)
85
+ # (- a z)
86
+ # (error 'a too small'))
87
+ # ))
88
+ # EOS
89
+ #
90
+ # expected =
91
+ # [:sexp,
92
+ # [[:id, "fn"], [:id, "stupid"], [[:arr, [[:id, "a"]]]],
93
+ # [[:sexp,
94
+ # [[:id, "let"],
95
+ # [[:arr, [[:id, "x"], [:int, 42], [:id, "y"], [[:sexp, [[:id, "succ"], [:id, "x"]]]], [:id, "z"], [[:sexp, [[:op, "+"], [:id, "x"], [:id, "y"]]]]]]],
96
+ # [[:sexp,
97
+ # [[:id, "if"], [[:sexp, [[:op, ">"], [:id, "a"], [:id, "z"]]]],
98
+ # [[:sexp, [[:op, "-"], [:id, "a"], [:id, "z"]]]],
99
+ # [[:sexp, [[:id, "error"], [:str, "a too small"]]]]]]]]]]]]
100
+ #
101
+ # ast = parse(sexp_parser.map(&:first), s_expression).ast
102
+ #
103
+ # ast => ^expected
104
+ #
105
+ module Advanced
106
+
107
+ def operator_parser(**kwds) = OperatorParser.make(**kwds)
108
+
109
+ def sexp_parser(**kwds) = SexpParser.make(**kwds)
110
+
111
+ def string_parser(delim: %{'"}, doubled_escape: nil, extra_parser: nil, escape_with: "\\", name: "StringParser") = StringParser.make(delim:, doubled_escape:, extra_parser:, escape_with:, name:)
112
+
113
+ def symbol_parser(prefix: %{:}, inner_class: [:alnum, '_'], lead_class: :alpha, name: nil) = SymbolParser.make(prefix:, inner_class:, lead_class:, name:)
114
+ end
115
+ end
116
+ end
117
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -0,0 +1,252 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'true_set'
4
+ require_relative '../parser'
5
+ module OOPeg
6
+ module Parsers
7
+ ##
8
+ # +BaseParsers+
9
+ #
10
+ # Implementation of all parsers that do not rely on other parsers or combinators
11
+ # ===== +char_parser+
12
+ #
13
+ # Parser that parses a set of characters or any character
14
+ #
15
+ # # example: parse any char
16
+ #
17
+ # parser = char_parser
18
+ #
19
+ # parse(char_parser, 'x').ast => 'x'
20
+ # parse(char_parser, 'xa').ast => 'x'
21
+ # parse(char_parser, '').ast => nil
22
+ #
23
+ # # example: parse just one char
24
+ #
25
+ # parser = char_parser('a')
26
+ #
27
+ # parse(parser, 'a').ast => 'a'
28
+ # parse(parser, 'b').ast => nil
29
+ #
30
+ # # example: parse a set of chars
31
+ #
32
+ # parser = char_parser('ab')
33
+ #
34
+ # parse(parser, 'a').ast => 'a'
35
+ # parse(parser, 'b').ast => 'b'
36
+ # parse(parser, 'c').ast => nil
37
+ #
38
+ # Sometimes we want to parse the complementary set of characters, then we can pass
39
+ # in <tt>negate: true</tt>
40
+ #
41
+ # # example: parse complement
42
+ #
43
+ # consonne = char_parser('aeiouy', negate: true)
44
+ #
45
+ # parse(consonne, 'x').ast => 'x'
46
+ # parse(consonne, 'i') not! ok
47
+ #
48
+ # ===== +char_class_parser+
49
+ #
50
+ # Parses a Regexp character class
51
+ #
52
+ # # example: parse digits
53
+ #
54
+ # digit_parser = char_class_parser(:digit)
55
+ #
56
+ # parse(digit_parser, '9').ast => '9'
57
+ # parse(digit_parser, 'a').ast => nil
58
+ #
59
+ # But we can parse a union of character classes
60
+ #
61
+ # # example: parse digits and lower case letters
62
+ #
63
+ # parser = char_class_parser(:digit, :lower)
64
+ #
65
+ # parse(parser, '8').ast => '8'
66
+ # parse(parser, 'a8').ast => 'a'
67
+ # parse(parser, 'é').ast => 'é'
68
+ # parse(parser, 'A8').ast => nil
69
+ #
70
+ # The available character classes are defined here: https://ruby-doc.org/3.4.1/Regexp.html#class-Regexp-label-POSIX+Bracket+Expressions
71
+ #
72
+ # ==== _Pseudo_ _Parsers_
73
+ #
74
+ # These are useful parsers in the context of combinators, but they must be
75
+ # used with care as they do not advance the input.
76
+ #
77
+ # This holds also for the +many+ combinator unless a <tt>min: >0</tt> argument is
78
+ # provided.
79
+ #
80
+ # ===== +end_parser+
81
+ #
82
+ # Only parses an empty string, ast is always nil.
83
+ #
84
+ # # example: end_parser
85
+ #
86
+ # parse(end_parser, "") is! ok
87
+ # parse(end_parser, "a") not! ok
88
+ #
89
+ # Its major use case is to assure that the whole input has been parsed, in other words, that
90
+ # there are no spurious characters at the end
91
+ #
92
+ # # example: end_parser, useful after all
93
+ #
94
+ # parser = int_parser.and(end_parser).map(&:first)
95
+ #
96
+ # parse(parser, "1905").ast => 1905
97
+ # parse(parser, "1905oh") not! ok
98
+ #
99
+ # ===== +true_parser+
100
+ #
101
+ # It always succeeds, but does not advance the input, this can be useful in some
102
+ # complex combinators we will show below
103
+ # OOPeg@Some+Complex+Combinators
104
+ #
105
+ # # example: true parser
106
+ #
107
+ # parse(true_parser, "hello") is! ok
108
+ # # But it does not advance
109
+ # parse(true_parser, "hello").input.content => %w[h e l l o]
110
+ #
111
+ # Very originally there is also the...
112
+ #
113
+ # ===== +false_parser+
114
+ #
115
+ # ... which always fails
116
+ #
117
+ # # example: false_parser
118
+ #
119
+ # # parse(false_parser, "hello") not! ok
120
+ # parse(false_parser, "hello").input.content ==> %w[h e l l o]
121
+ #
122
+ module BaseParsers
123
+
124
+ # Parses a character which is a member of any of the `char_classes`
125
+ def char_class_parser(*char_classes, name: nil)
126
+ case char_classes
127
+ in [char_class]
128
+ _1_char_class_parser(char_class, name:)
129
+ else
130
+ _char_classes_parser(*char_classes, name:)
131
+ end
132
+ end
133
+
134
+ def char_parser(set=nil, name: nil, negate: false)
135
+ set = mk_set(set)
136
+ name ||= "char_parser(#{set.to_a.join})"
137
+ parser_name = name
138
+ Parser.new(name) do |input|
139
+ case input.content
140
+ in []
141
+ Result.nok(error: "unexpected end of input", input:, parser_name:)
142
+ in [h, *]
143
+ if set.member?(h) && !negate || !set.member?(h) && negate
144
+ Result.ok(ast: h, input: input.advance)
145
+ else
146
+ Result.nok(input:, error: "#{h} is not member of the required set #{set}", parser_name: name)
147
+ end
148
+ end
149
+ end
150
+ end
151
+
152
+ # Pseudo Parsers
153
+ def end_parser(name: nil)
154
+ parser_name = name || "end_parser"
155
+ Parser.new(parser_name) do |input|
156
+ case input.content
157
+ in []
158
+ Result.ok(ast: nil, input:)
159
+ in _
160
+ Result.nok(input: input, error: "not at end of input", parser_name:)
161
+ end
162
+ end
163
+ end
164
+
165
+ def false_parser(name: nil)
166
+ parser_name = name || "false_parser"
167
+ Parser.new(parser_name) { Result.nok(error: "false parser always fails", input: it, parser_name:) }
168
+ end
169
+
170
+ def true_parser(name: nil)
171
+ parser_name = name || "true_parser"
172
+ Parser.new(parser_name) { Result.ok(ast: nil, input: it) }
173
+ end
174
+
175
+ # def make_parser(parser)
176
+ # case parser
177
+ # when String
178
+ # char_parser(parser)
179
+ # else
180
+ # parser
181
+ # end
182
+ # end
183
+
184
+ # def make_parsers(*parsers)
185
+ # parsers
186
+ # .flatten
187
+ # .map { make_parser it }
188
+ # end
189
+
190
+ private
191
+
192
+ def _1_char_class_parser(char_class, name:)
193
+ rgx = Regexp.compile("[[:#{char_class}:]]")
194
+ name ||= "char_class_parser(:#{char_class})"
195
+ Parser.new(name) do |input|
196
+ case input.content
197
+ in []
198
+ Result.nok(error: "unexpected end of input", parser_name: name, input:)
199
+ in [h, *]
200
+ if rgx.match?(h)
201
+ Result.ok(ast: h, input: input.advance)
202
+ else
203
+ Result.nok(input:, parser_name: name, error: "#{h} does not match the char class: :#{char_class}")
204
+ end
205
+ end
206
+ end
207
+ end
208
+
209
+ def _char_classes_parser(*char_classes, name:)
210
+ rgx = Regexp.compile("[#{_compile_char_classes(char_classes)}]")
211
+ name ||= "char_class_parser(#{char_classes.inspect})"
212
+ Parser.new(name) do |input|
213
+ case input.content
214
+ in []
215
+ Result.nok(error: "unexpected end of input", input:, parser_name: name)
216
+ in [h, *]
217
+ if rgx.match?(h)
218
+ Result.ok(ast: h, input: input.advance)
219
+ else
220
+ Result.nok(input:, error: "#{h} does not match the char class: :#{char_classes}")
221
+ end
222
+ end
223
+ end
224
+ end
225
+
226
+ def _compile_char_class(char_class)
227
+ case char_class
228
+ when Symbol
229
+ "[:#{char_class}:]"
230
+ when String
231
+ "[#{char_class}]"
232
+ end
233
+ end
234
+
235
+ def _compile_char_classes(char_classes)
236
+ "[" +
237
+ char_classes
238
+ .map { _compile_char_class it }
239
+ .join + "]"
240
+ end
241
+
242
+ def mk_set(set)
243
+ if set
244
+ Set.new(set.grapheme_clusters)
245
+ else
246
+ TrueSet
247
+ end
248
+ end
249
+ end
250
+ end
251
+ end
252
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -0,0 +1,193 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OOPeg
4
+ module Parsers
5
+ ##
6
+ #
7
+ # These are _convenience_ parsers that parse tokens which are commonly used
8
+ # in modern languages like +Elixir+, +Lua+, +Scheme+ or +Javascript+
9
+ #
10
+ #
11
+ # ===== +int_parser+
12
+ #
13
+ # As seen in the Quick Start Section it uses the +.and+ and the +.map+ combinators
14
+ # on the basic parsers.
15
+ #
16
+ # # example: int_parser (again)
17
+ #
18
+ # parse(int_parser, "-42").ast => -42
19
+ # parse(int_parser, "+3").ast => 3
20
+ # parse(int_parser, "73").ast => 73
21
+ # parse(int_parser, "x4") not! ok
22
+ #
23
+ # # example: int_parser (with base 16)
24
+ #
25
+ # parse(int_parser(hex: true), "cafee").ast => 831470
26
+ #
27
+ # Let us introduce the
28
+ # ===== +word_parser+
29
+ #
30
+ # also using the +.and+ combinator to make a hex_int_parser
31
+ #
32
+ # # example: hex_int_parser
33
+ #
34
+ # hex_int_parser = word_parser("0x").and(int_parser(hex: true))
35
+ #
36
+ # parse(hex_int_parser, "0x1f").ast => ["0x", 31]
37
+ #
38
+ # Now let us also introduce the +.map+ combinator to get rid of the superflous output
39
+ #
40
+ # # example: A better hex_int_parser
41
+ #
42
+ # better_hex_int_parser = word_parser("0x")
43
+ # .and(int_parser(hex: true))
44
+ # .map(&:last)
45
+ #
46
+ # parse(better_hex_int_parser, "0x1f").ast => 31
47
+ #
48
+ # And this is also a perfect opportunity to introduce the +.or+ combinator by creating a
49
+ # more general int parser
50
+ #
51
+ # # example: A more general int parser
52
+ #
53
+ # general_int_parser =
54
+ # word_parser("0x")
55
+ # .and(int_parser(hex: true))
56
+ # .map(&:last)
57
+ # .or(int_parser)
58
+ #
59
+ # parse(general_int_parser, "12").ast => 12
60
+ # parse(general_int_parser, "0xff").ast => 255
61
+ # parse(general_int_parser, "-4").ast => -4
62
+ # parse(general_int_parser, "ff") not! ok
63
+ #
64
+ #
65
+ # ===== +id_parser+
66
+ #
67
+ # This is a highly configurable parser which allows to parse tokens which are
68
+ # typically names or identifiers.
69
+ #
70
+ # *N.B.* that it is not a combinator as none of its arguments is a parser.
71
+ #
72
+ # It parses an identifier based on a lead character class (for the first character) and
73
+ # an inner character class (for the rest of characters). Their default values are as follows
74
+ #
75
+ # <tt>lead_class: :alpha, inner_class: [:alnum, "_"]</tt>
76
+ #
77
+ # which parses like...
78
+ #
79
+ # # example: the default id_parser
80
+ #
81
+ # parse(id_parser, "_42") not! ok
82
+ # parse(id_parser, "42") not! ok
83
+ # parse(id_parser, "a_42").ast => "a_42"
84
+ #
85
+ # But if we want a more lispy style we could simply...
86
+ #
87
+ # # example: the lispy id_parser
88
+ #
89
+ # lispy_id_parser = id_parser(inner_class: [:alnum, "-"])
90
+ #
91
+ # parse(lispy_id_parser, "-42") not! ok
92
+ # parse(lispy_id_parser, "42") not! ok
93
+ # parse(lispy_id_parser, "_42") not! ok
94
+ # parse(lispy_id_parser, "a-42").ast => "a-42"
95
+ #
96
+ # And on the same token we can allow leading +-+ characters
97
+ #
98
+ # # example: the weired lispy id_parser
99
+ #
100
+ # weired_lispy_id_parser = id_parser(lead_class: [:alpha, "-"], inner_class: [:alnum, "-"])
101
+ #
102
+ # parse(weired_lispy_id_parser, "42") not! ok
103
+ # parse(weired_lispy_id_parser, "_42") not! ok
104
+ # parse(weired_lispy_id_parser, "a-42").ast => "a-42"
105
+ # parse(weired_lispy_id_parser, "-42").ast => "-42"
106
+ #
107
+ # ===== +kwd_parser+, restraining ids to a set of worlds
108
+ #
109
+ # # example: only a defined set of identifiers
110
+ #
111
+ # cond_parser = kwd_parser(Set.new(%w[if else unless]))
112
+ #
113
+ # parse(cond_parser, "if").ast => "if"
114
+ # parse(cond_parser, "unless").ast => "unless"
115
+ # parse(cond_parser, "else").ast => "else"
116
+ # parse(cond_parser, "end") not! ok
117
+ #
118
+ # # example: kwd_parser can be customized like the id_parser
119
+ #
120
+ # parser = kwd_parser(Set.new(%w[1a 2b]), lead_class: :digit)
121
+ #
122
+ # parse(parser, "1a").ast => "1a"
123
+ # parse(parser, "2b").ast => "2b"
124
+ # parse(parser, "3c") not! ok
125
+ #
126
+ # ==== +set_parser+, a convenience parser (does not need a set put a splashed list)
127
+ #
128
+ # # example: set_parser
129
+ #
130
+ # greek_parser = set_parser('alpha', 'beta', 'gamma')
131
+ #
132
+ # parse(greek_parser, 'alpha').ast => 'alpha'
133
+ # parse(greek_parser, 'beta').ast => 'beta'
134
+ # parse(greek_parser, 'gamma').ast => 'gamma'
135
+ #
136
+ # parse(greek_parser, 'delta') not! ok
137
+ #
138
+ module CommonParsers
139
+
140
+ def id_parser(name: nil, lead_class: :alpha, inner_class: [:alnum, "_"] )
141
+ char_class_parser(*Array(lead_class))
142
+ .and(char_class_parser(*Array(inner_class)).many, name:)
143
+ .map { it.flatten.join }
144
+ end
145
+
146
+ # Just parses any string starting with either a `+` or `-` sign followed by at least one
147
+ # _decimal digit_.
148
+ #
149
+ # **N.B.** that leading zeroes are parsed (and therefore ignored) and will not parse
150
+ # it as a hexadecimal or octal number
151
+ def int_parser(name: nil, hex: false)
152
+ name ||= "int_parser"
153
+ char_class = hex ? :xdigit : :digit
154
+ base = hex ? 16 : 10
155
+ char_parser("+-")
156
+ .maybe
157
+ .and(char_class_parser(char_class).many(min: 1))
158
+ .map_or_rename(name:) {
159
+ # require "debug"; binding.break
160
+ it.join.to_i(base) }
161
+ # .debug
162
+ end
163
+
164
+ def kwd_parser(set, name: nil, lead_class: :alpha, inner_class: [:alnum, "_"])
165
+ id_parser(name:, lead_class:, inner_class:)
166
+ .satisfy { set.member? it }
167
+ end
168
+
169
+ def set_parser(*elements, name: nil, lead_class: :alpha, inner_class: [:alnum, "_"])
170
+ kwd_parser(Set.new(elements), name:, lead_class:, inner_class:)
171
+ end
172
+
173
+ def word_parser(word, name: nil)
174
+ name ||= "word_parser(#{word})"
175
+ word.grapheme_clusters => [head, *tails]
176
+ char_parser(head)
177
+ .and(*tails.map { char_parser it }, name:)
178
+ .map(&:join)
179
+ end
180
+
181
+ def ws_parser(name=nil, min: 1, ignore: true)
182
+ name ||= "ws_parser"
183
+ parser =
184
+ char_class_parser(:space)
185
+ .many(min:, name:)
186
+ return parser unless ignore
187
+
188
+ parser.ignore
189
+ end
190
+ end
191
+ end
192
+ end
193
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'advanced'
4
+ module OOPeg
5
+ module Parsers
6
+ ##
7
+ #
8
+ # A highly configurable parser for s-expressions
9
+ class LispyParser
10
+
11
+ private
12
+ def initialize(prefix: "([", suffix: "])", sep_parser: ws_parser)
13
+ end
14
+
15
+ end
16
+ end
17
+ end
18
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'true_set'
4
+ require_relative '../parser'
5
+ module OOPeg
6
+ module Parsers
7
+ module PseudoParsers
8
+
9
+ end
10
+ end
11
+ end
12
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OOPeg
4
+ module Parsers
5
+ module TrueSet extend self
6
+ def member?(_) = true
7
+ def to_a = %w[TrueSet]
8
+ end
9
+ end
10
+ end
11
+ # SPDX-License-Identifier: AGPL-3.0-or-later
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "parsers/base_parsers"
4
+ require_relative "parsers/common_parsers"
5
+ require_relative "parser/combinators/lazy"
6
+ module OOPeg
7
+ module Parsers
8
+ include BaseParsers
9
+ include CommonParsers
10
+ include Parser::Combinators::Lazy
11
+ end
12
+ end
13
+ # SPDX-License-Identifier: AGPL-3.0-or-later