pestle 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. checksums.yaml +7 -0
  2. checksums.yaml.gz.sig +0 -0
  3. data/.rubocop.yml +59 -0
  4. data/.ruby-version +1 -0
  5. data/CHANGELOG.md +3 -0
  6. data/LICENSE.txt +21 -0
  7. data/LICENSE_PEST.txt +23 -0
  8. data/README.md +124 -0
  9. data/Rakefile +23 -0
  10. data/Steepfile +19 -0
  11. data/benchmarks/jsonpath_ips.rb +33 -0
  12. data/examples/calculator_pratt.rb +157 -0
  13. data/examples/calculator_prec_climber.rb +225 -0
  14. data/examples/calculator_stack_vm.rb +291 -0
  15. data/examples/csv.rb +73 -0
  16. data/examples/ini.rb +90 -0
  17. data/examples/json_example.rb +141 -0
  18. data/examples/jsonpath/README.md +3 -0
  19. data/examples/jsonpath/jsonpath.pest +182 -0
  20. data/examples/jsonpath/lib/jsonpath/ast.rb +362 -0
  21. data/examples/jsonpath/lib/jsonpath/function_extensions.rb +201 -0
  22. data/examples/jsonpath/lib/jsonpath/node.rb +20 -0
  23. data/examples/jsonpath/lib/jsonpath/query.rb +25 -0
  24. data/examples/jsonpath/lib/jsonpath.rb +453 -0
  25. data/lib/pestle/errors.rb +98 -0
  26. data/lib/pestle/grammar/builtin_rules/ascii.rb +38 -0
  27. data/lib/pestle/grammar/builtin_rules/special.rb +63 -0
  28. data/lib/pestle/grammar/builtin_rules/unicode.rb +291 -0
  29. data/lib/pestle/grammar/errors.rb +62 -0
  30. data/lib/pestle/grammar/expression.rb +90 -0
  31. data/lib/pestle/grammar/expressions/choice.rb +36 -0
  32. data/lib/pestle/grammar/expressions/group.rb +27 -0
  33. data/lib/pestle/grammar/expressions/identifier.rb +26 -0
  34. data/lib/pestle/grammar/expressions/postfix.rb +272 -0
  35. data/lib/pestle/grammar/expressions/prefix.rb +51 -0
  36. data/lib/pestle/grammar/expressions/range.rb +26 -0
  37. data/lib/pestle/grammar/expressions/sequence.rb +38 -0
  38. data/lib/pestle/grammar/expressions/stack.rb +192 -0
  39. data/lib/pestle/grammar/expressions/string.rb +46 -0
  40. data/lib/pestle/grammar/lexer.rb +464 -0
  41. data/lib/pestle/grammar/parser.rb +340 -0
  42. data/lib/pestle/grammar/rule.rb +98 -0
  43. data/lib/pestle/pair.rb +325 -0
  44. data/lib/pestle/parser.rb +48 -0
  45. data/lib/pestle/pratt.rb +74 -0
  46. data/lib/pestle/state.rb +220 -0
  47. data/lib/pestle/version.rb +5 -0
  48. data/lib/pestle.rb +24 -0
  49. data/sig/errors.rbs +22 -0
  50. data/sig/grammar/ascii.rbs +9 -0
  51. data/sig/grammar/choice.rbs +14 -0
  52. data/sig/grammar/errors.rbs +22 -0
  53. data/sig/grammar/expression.rbs +39 -0
  54. data/sig/grammar/group.rbs +14 -0
  55. data/sig/grammar/identifier.rbs +11 -0
  56. data/sig/grammar/lexer.rbs +85 -0
  57. data/sig/grammar/parser.rbs +57 -0
  58. data/sig/grammar/postfix.rbs +112 -0
  59. data/sig/grammar/prefix.rbs +27 -0
  60. data/sig/grammar/range.rbs +20 -0
  61. data/sig/grammar/rule.rbs +40 -0
  62. data/sig/grammar/sequence.rbs +14 -0
  63. data/sig/grammar/special.rbs +39 -0
  64. data/sig/grammar/stack.rbs +57 -0
  65. data/sig/grammar/string.rbs +27 -0
  66. data/sig/grammar/unicode.rbs +15 -0
  67. data/sig/pair.rbs +168 -0
  68. data/sig/parser.rbs +16 -0
  69. data/sig/pestle.rbs +5 -0
  70. data/sig/pratt.rbs +27 -0
  71. data/sig/state.rbs +95 -0
  72. data/sig/stdlib/strscan.rbs +3 -0
  73. data.tar.gz.sig +0 -0
  74. metadata +141 -0
  75. metadata.gz.sig +0 -0
@@ -0,0 +1,340 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "errors"
4
+ require_relative "expression"
5
+ require_relative "expressions/choice"
6
+ require_relative "expressions/group"
7
+ require_relative "expressions/identifier"
8
+ require_relative "expressions/postfix"
9
+ require_relative "expressions/prefix"
10
+ require_relative "expressions/range"
11
+ require_relative "expressions/sequence"
12
+ require_relative "expressions/stack"
13
+ require_relative "expressions/string"
14
+ require_relative "rule"
15
+
16
+ # Pest grammar parsing and evaluation.
17
+ module Pestle::Grammar
18
+ # Pest grammar parser.
19
+ class Parser
20
+ PREC_LOWEST = 1
21
+ PREC_CHOICE = 2
22
+ PREC_SEQUENCE = 3
23
+ PREC_PREFIX = 4
24
+
25
+ PRECEDENCES = {
26
+ token_choice_op: PREC_CHOICE,
27
+ token_sequence_op: PREC_SEQUENCE
28
+ }.freeze
29
+
30
+ INFIX_OPERATORS = Set.new(%i[token_choice_op token_sequence_op])
31
+
32
+ def initialize(source, tokens)
33
+ @source = source
34
+ @tokens = tokens
35
+ @pos = 0
36
+ @eof = tokens.last || raise
37
+ end
38
+
39
+ # Return the current token without consuming it.
40
+ # An EOF token is returned if there are no tokens left.
41
+ def current = @tokens[@pos] || @eof
42
+
43
+ # Consume and return the current token.
44
+ def next
45
+ token = @tokens[@pos]
46
+ @pos += 1 unless token.nil?
47
+ token || @eof
48
+ end
49
+
50
+ # Peek ahead without consuming tokens.
51
+ def peek(offset = 1) = @tokens[@pos + offset] || @eof
52
+
53
+ # Consume the next token if its kind matches _kind_, raise an error if it does not.
54
+ def eat(kind, message = nil)
55
+ token = self.next
56
+
57
+ unless token.first == kind
58
+ raise PestGrammarError.new(message || "unexpected #{token.first}", @source, token)
59
+ end
60
+
61
+ token
62
+ end
63
+
64
+ def parse
65
+ grammar_doc = [] # : Array[String]
66
+ grammar_doc << (self.next[1] || raise) while current.first == :token_grammar_doc
67
+ [parse_rules, grammar_doc.join("\n")]
68
+ end
69
+
70
+ def parse_rules
71
+ rules = {} # : Hash[String, Rule]
72
+
73
+ loop do
74
+ break if current.first == :token_eof
75
+
76
+ rule_doc = [] # : Array[String]
77
+ rule_doc << (self.next[1] || raise) while current.first == :token_rule_doc
78
+ name = eat(:token_ident)[1] || raise
79
+ eat(:token_assign_op)
80
+ modifier = parse_modifier
81
+ eat(:token_l_brace)
82
+ expression = parse_expression(PREC_LOWEST)
83
+ eat(:token_r_brace)
84
+
85
+ rules[name] = Rule.new(name, expression, modifier: modifier, doc: rule_doc.join("\n"))
86
+ end
87
+
88
+ rules
89
+ end
90
+
91
+ def parse_modifier
92
+ case current.first
93
+ when :token_mod_silent
94
+ @pos += 1
95
+ 1 << 0
96
+ when :token_mod_atomic
97
+ @pos += 1
98
+ 1 << 1
99
+ when :token_mod_compound
100
+ @pos += 1
101
+ 1 << 2
102
+ when :token_mod_non_atomic
103
+ @pos += 1
104
+ 1 << 3
105
+ else
106
+ 0
107
+ end
108
+ end
109
+
110
+ def parse_expression(precedence)
111
+ # Skip leading choice operator
112
+ self.next if current.first == :token_choice_op
113
+
114
+ tag = if current.first == :token_tag
115
+ tag_tok = self.next
116
+ eat(:token_assign_op)
117
+ tag_tok[1]
118
+ end
119
+
120
+ token = current
121
+
122
+ # @type var left: Expression
123
+ left = case token.first
124
+ when :token_string
125
+ StringLiteral.new(self.next[1] || raise, tag: tag)
126
+ when :token_ci_string
127
+ InsensitiveString.new(self.next[1] || raise, tag: tag)
128
+ when :token_string_esc
129
+ StringLiteral.new(unescape(self.next[1] || raise, token),
130
+ tag: tag)
131
+ when :token_ci_string_esc
132
+ InsensitiveString.new(unescape(self.next[1] || raise, token),
133
+ tag: tag)
134
+ when :token_l_paren
135
+ @pos += 1
136
+ expr = Group.new(parse_expression(PREC_LOWEST), tag: tag)
137
+ eat(:token_r_paren)
138
+ expr
139
+ when :token_ident
140
+ Identifier.new(self.next[1] || raise, tag: tag)
141
+ when :token_push_literal
142
+ @pos += 1
143
+ PushLiteral.new(eat(:token_string)[1] || raise, tag: tag)
144
+ when :token_push_expr
145
+ @pos += 1
146
+ eat(:token_l_paren)
147
+ expr = Push.new(parse_expression(PREC_LOWEST), tag: tag)
148
+ eat(:token_r_paren)
149
+ expr
150
+ when :token_peek
151
+ @pos += 1
152
+ parse_peek_expression(tag)
153
+ when :token_peek_all
154
+ @pos += 1
155
+ PeekAll.new(tag: tag)
156
+ when :token_pop
157
+ @pos += 1
158
+ Pop.new(tag: tag)
159
+ when :token_drop
160
+ @pos += 1
161
+ Drop.new(tag: tag)
162
+ when :token_pop_all
163
+ @pos += 1
164
+ PopAll.new(tag: tag)
165
+ when :token_char
166
+ start = unescape(self.next[1] || raise, token)
167
+ eat(:token_range_op)
168
+ stop = unescape(eat(:token_char)[1] || raise, token)
169
+ Range.new(start, stop, tag: tag)
170
+ when :token_pos_pred
171
+ @pos += 1
172
+ PositivePredicate.new(parse_expression(PREC_PREFIX), tag: tag)
173
+ when :token_neg_pred
174
+ @pos += 1
175
+ NegativePredicate.new(parse_expression(PREC_PREFIX), tag: tag)
176
+ else
177
+ raise PestGrammarError.new("unexpected token #{token.first}", @source, token)
178
+ end
179
+
180
+ left = parse_postfix_expression(left)
181
+
182
+ loop do
183
+ kind = current.first
184
+ break unless INFIX_OPERATORS.member?(kind)
185
+ break if (PRECEDENCES[kind] || PREC_LOWEST) < precedence
186
+
187
+ left = parse_infix_expression(left)
188
+ end
189
+
190
+ left
191
+ end
192
+
193
+ def parse_infix_expression(left)
194
+ token = self.next
195
+ kind = token.first
196
+ precedence = PRECEDENCES[kind] || PREC_LOWEST
197
+ right = parse_expression(precedence)
198
+
199
+ case kind
200
+ when :token_choice_op
201
+ if right.is_a?(Choice)
202
+ Choice.new(left, *right.children)
203
+ else
204
+ Choice.new(left, right)
205
+ end
206
+ when :token_sequence_op
207
+ if right.is_a?(Sequence)
208
+ Sequence.new(left, *right.children)
209
+ else
210
+ Sequence.new(left, right)
211
+ end
212
+ else
213
+ raise PestGrammarError.new("unexpected operator #{kind}", @source, token)
214
+ end
215
+ end
216
+
217
+ def parse_postfix_expression(expr)
218
+ token = current
219
+ kind = token.first
220
+
221
+ case kind
222
+ when :token_optional_op
223
+ @pos += 1
224
+ Optional.new(expr)
225
+ when :token_repeat_op
226
+ @pos += 1
227
+ Repeat.new(expr)
228
+ when :token_repeat_once_op
229
+ @pos += 1
230
+ RepeatOnce.new(expr)
231
+ when :token_l_brace
232
+ @pos += 1
233
+ parse_repeat_expression(expr)
234
+ else
235
+ expr
236
+ end
237
+ end
238
+
239
+ def parse_repeat_expression(expr)
240
+ token = self.next
241
+ kind = token.first
242
+
243
+ if kind == :token_number
244
+ number = (token[1] || raise).to_i
245
+ if current.first == :token_r_brace
246
+ @pos += 1
247
+ return RepeatExact.new(expr, number)
248
+ end
249
+
250
+ eat(:token_comma)
251
+
252
+ if current.first == :token_r_brace
253
+ @pos += 1
254
+ return RepeatMin.new(expr, number)
255
+ end
256
+
257
+ stop = (eat(:token_number)[1] || raise).to_i
258
+ eat(:token_r_brace)
259
+ return RepeatMinMax.new(expr, number, stop)
260
+ end
261
+
262
+ if kind == :token_comma
263
+ number = (eat(:token_number)[1] || raise).to_i
264
+ eat(:token_r_brace)
265
+ return RepeatMax.new(expr, number)
266
+ end
267
+
268
+ raise PestGrammarError.new("expected a number or a comma", @source, token)
269
+ end
270
+
271
+ def parse_peek_expression(tag)
272
+ return Peek.new(tag: tag) unless current.first == :token_l_bracket
273
+
274
+ self.next
275
+ start = ((self.next[1] || raise).to_i if current.first == :token_integer)
276
+ eat(:token_range_op)
277
+ stop = ((self.next[1] || raise).to_i if current.first == :token_integer)
278
+ eat(:token_r_bracket)
279
+ PeekSlice.new(start, stop, tag: tag)
280
+ end
281
+
282
+ RE_SLASH_X = /\\x([0-9a-fA-F]{2})/
283
+ RE_SLASH_U = /\\u\{([0-9a-fA-F]{2,6})\}/
284
+
285
+ def unescape(value, token)
286
+ unescaped = [] # : Array[String]
287
+ scanner = StringScanner.new(value)
288
+
289
+ until scanner.eos?
290
+ if scanner.scan(RE_SLASH_X)
291
+ unescaped << (scanner.captures&.first || raise).to_i(16).chr(Encoding::UTF_8)
292
+ next
293
+ end
294
+
295
+ if scanner.scan(RE_SLASH_U)
296
+ unescaped << (scanner.captures&.first || raise).to_i(16).chr(Encoding::UTF_8)
297
+ next
298
+ end
299
+
300
+ ch = scanner.getch
301
+
302
+ break if ch.nil?
303
+
304
+ unless ch == "\\"
305
+ unescaped << ch
306
+ next
307
+ end
308
+
309
+ ch = scanner.getch
310
+
311
+ case ch
312
+ when "\""
313
+ unescaped << "\""
314
+ when "'"
315
+ unescaped << "'"
316
+ when "\\"
317
+ unescaped << "\\"
318
+ when "/"
319
+ unescaped << "/"
320
+ when "b"
321
+ unescaped << "\x08"
322
+ when "f"
323
+ unescaped << "\x0c"
324
+ when "n"
325
+ unescaped << "\n"
326
+ when "r"
327
+ unescaped << "\r"
328
+ when "t"
329
+ unescaped << "\t"
330
+ when nil
331
+ raise PestGrammarError.new("incomplete escape sequence", @source, token)
332
+ else
333
+ raise PestGrammarError.new("unknown escape sequence", @source, token)
334
+ end
335
+ end
336
+
337
+ unescaped.join
338
+ end
339
+ end
340
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Pestle::Grammar
4
+ # A Pest grammar rule.
5
+ class Rule < Expression
6
+ attr_reader :name, :modifier, :doc
7
+
8
+ SILENT = 1 << 0
9
+ ATOMIC = 1 << 1
10
+ COMPOUND = 1 << 2
11
+ NONATOMIC = 1 << 3
12
+
13
+ SILENT_ATOMIC = SILENT | ATOMIC
14
+ SILENT_COMPOUND = SILENT | COMPOUND
15
+ SILENT_NONATOMIC = SILENT | NONATOMIC
16
+
17
+ def initialize(name, expression, modifier: 0, doc: nil)
18
+ super(tag: nil)
19
+ @name = name
20
+ @expression = expression
21
+ @modifier = %w[COMMENT WHITESPACE].include?(name) ? modifier | ATOMIC : modifier
22
+ @doc = doc
23
+ end
24
+
25
+ def to_s
26
+ modifier = case @modifier
27
+ when SILENT
28
+ "_"
29
+ when ATOMIC
30
+ "@"
31
+ when COMPOUND
32
+ "$"
33
+ when NONATOMIC
34
+ "!"
35
+ else
36
+ ""
37
+ end
38
+
39
+ "#{@name} = #{modifier}{ #{@expression} }"
40
+ end
41
+
42
+ def parse(state, pairs)
43
+ start_pos = state.scanner.pos
44
+ silent = @modifier.anybits?(SILENT)
45
+ state.rule_stack << @name unless silent
46
+
47
+ matched = false
48
+ children = [] # : Array[Pestle::Pair]
49
+
50
+ if @modifier.anybits?(ATOMIC | COMPOUND)
51
+ state.atomic do
52
+ matched = @expression.parse(state, children)
53
+ end
54
+ elsif @modifier.anybits?(NONATOMIC)
55
+ state.nonatomic do
56
+ matched = @expression.parse(state, children)
57
+ end
58
+ else
59
+ matched = @expression.parse(state, children)
60
+ end
61
+
62
+ state.rule_stack.pop unless silent
63
+ tag = state.tags.pop
64
+
65
+ return false unless matched
66
+
67
+ if silent
68
+ pairs.concat(children)
69
+ return true
70
+ end
71
+
72
+ if @modifier.anybits?(ATOMIC)
73
+ # @type var rule: Rule?
74
+ # steep:ignore:start
75
+ rule = if @expression.is_a?(Rule)
76
+ @expression
77
+ elsif @expression.is_a?(Identifier)
78
+ state.rules[@expression.value]
79
+ end
80
+ # steep:ignore:end
81
+
82
+ if rule.nil? || rule.modifier.nobits?(NONATOMIC | COMPOUND)
83
+ # Atomic rule silences children.
84
+ children = [] # : Array[Pestle::Pair]
85
+ end
86
+ end
87
+
88
+ pairs << Pestle::Pair.new(state.text, start_pos, state.scanner.pos, @name, children, tag: tag)
89
+
90
+ true
91
+ end
92
+
93
+ def children = [@expression]
94
+ end
95
+
96
+ class BuiltInRule < Rule
97
+ end
98
+ end