plurimath-parslet 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.txt +284 -0
- data/LICENSE +23 -0
- data/README.adoc +454 -0
- data/Rakefile +71 -0
- data/lib/parslet/accelerator/application.rb +62 -0
- data/lib/parslet/accelerator/engine.rb +112 -0
- data/lib/parslet/accelerator.rb +162 -0
- data/lib/parslet/atoms/alternative.rb +53 -0
- data/lib/parslet/atoms/base.rb +157 -0
- data/lib/parslet/atoms/can_flatten.rb +137 -0
- data/lib/parslet/atoms/capture.rb +38 -0
- data/lib/parslet/atoms/context.rb +103 -0
- data/lib/parslet/atoms/dsl.rb +112 -0
- data/lib/parslet/atoms/dynamic.rb +32 -0
- data/lib/parslet/atoms/entity.rb +45 -0
- data/lib/parslet/atoms/ignored.rb +26 -0
- data/lib/parslet/atoms/infix.rb +115 -0
- data/lib/parslet/atoms/lookahead.rb +52 -0
- data/lib/parslet/atoms/named.rb +32 -0
- data/lib/parslet/atoms/re.rb +41 -0
- data/lib/parslet/atoms/repetition.rb +87 -0
- data/lib/parslet/atoms/scope.rb +26 -0
- data/lib/parslet/atoms/sequence.rb +48 -0
- data/lib/parslet/atoms/str.rb +42 -0
- data/lib/parslet/atoms/visitor.rb +89 -0
- data/lib/parslet/atoms.rb +34 -0
- data/lib/parslet/cause.rb +101 -0
- data/lib/parslet/context.rb +21 -0
- data/lib/parslet/convenience.rb +33 -0
- data/lib/parslet/error_reporter/contextual.rb +120 -0
- data/lib/parslet/error_reporter/deepest.rb +100 -0
- data/lib/parslet/error_reporter/tree.rb +63 -0
- data/lib/parslet/error_reporter.rb +8 -0
- data/lib/parslet/export.rb +163 -0
- data/lib/parslet/expression/treetop.rb +92 -0
- data/lib/parslet/expression.rb +51 -0
- data/lib/parslet/graphviz.rb +97 -0
- data/lib/parslet/parser.rb +68 -0
- data/lib/parslet/pattern/binding.rb +49 -0
- data/lib/parslet/pattern.rb +113 -0
- data/lib/parslet/position.rb +21 -0
- data/lib/parslet/rig/rspec.rb +52 -0
- data/lib/parslet/scope.rb +42 -0
- data/lib/parslet/slice.rb +105 -0
- data/lib/parslet/source/line_cache.rb +99 -0
- data/lib/parslet/source.rb +96 -0
- data/lib/parslet/transform.rb +265 -0
- data/lib/parslet/version.rb +5 -0
- data/lib/parslet.rb +314 -0
- data/plurimath-parslet.gemspec +42 -0
- data/spec/acceptance/infix_parser_spec.rb +145 -0
- data/spec/acceptance/mixing_parsers_spec.rb +74 -0
- data/spec/acceptance/regression_spec.rb +329 -0
- data/spec/acceptance/repetition_and_maybe_spec.rb +44 -0
- data/spec/acceptance/unconsumed_input_spec.rb +21 -0
- data/spec/examples/boolean_algebra_spec.rb +257 -0
- data/spec/examples/calc_spec.rb +278 -0
- data/spec/examples/capture_spec.rb +137 -0
- data/spec/examples/comments_spec.rb +186 -0
- data/spec/examples/deepest_errors_spec.rb +420 -0
- data/spec/examples/documentation_spec.rb +205 -0
- data/spec/examples/email_parser_spec.rb +275 -0
- data/spec/examples/empty_spec.rb +37 -0
- data/spec/examples/erb_spec.rb +482 -0
- data/spec/examples/ip_address_spec.rb +153 -0
- data/spec/examples/json_spec.rb +413 -0
- data/spec/examples/local_spec.rb +302 -0
- data/spec/examples/mathn_spec.rb +151 -0
- data/spec/examples/minilisp_spec.rb +492 -0
- data/spec/examples/modularity_spec.rb +340 -0
- data/spec/examples/nested_errors_spec.rb +322 -0
- data/spec/examples/optimized_erb_spec.rb +299 -0
- data/spec/examples/parens_spec.rb +239 -0
- data/spec/examples/prec_calc_spec.rb +525 -0
- data/spec/examples/readme_spec.rb +228 -0
- data/spec/examples/scopes_spec.rb +187 -0
- data/spec/examples/seasons_spec.rb +196 -0
- data/spec/examples/sentence_spec.rb +119 -0
- data/spec/examples/simple_xml_spec.rb +250 -0
- data/spec/examples/string_parser_spec.rb +407 -0
- data/spec/fixtures/examples/boolean_algebra.rb +62 -0
- data/spec/fixtures/examples/calc.rb +86 -0
- data/spec/fixtures/examples/capture.rb +36 -0
- data/spec/fixtures/examples/comments.rb +22 -0
- data/spec/fixtures/examples/deepest_errors.rb +99 -0
- data/spec/fixtures/examples/documentation.rb +32 -0
- data/spec/fixtures/examples/email_parser.rb +42 -0
- data/spec/fixtures/examples/empty.rb +10 -0
- data/spec/fixtures/examples/erb.rb +39 -0
- data/spec/fixtures/examples/ip_address.rb +103 -0
- data/spec/fixtures/examples/json.rb +107 -0
- data/spec/fixtures/examples/local.rb +60 -0
- data/spec/fixtures/examples/mathn.rb +47 -0
- data/spec/fixtures/examples/minilisp.rb +75 -0
- data/spec/fixtures/examples/modularity.rb +60 -0
- data/spec/fixtures/examples/nested_errors.rb +95 -0
- data/spec/fixtures/examples/optimized_erb.rb +105 -0
- data/spec/fixtures/examples/parens.rb +25 -0
- data/spec/fixtures/examples/prec_calc.rb +71 -0
- data/spec/fixtures/examples/readme.rb +59 -0
- data/spec/fixtures/examples/scopes.rb +43 -0
- data/spec/fixtures/examples/seasons.rb +40 -0
- data/spec/fixtures/examples/sentence.rb +18 -0
- data/spec/fixtures/examples/simple_xml.rb +51 -0
- data/spec/fixtures/examples/string_parser.rb +77 -0
- data/spec/parslet/atom_results_spec.rb +39 -0
- data/spec/parslet/atoms/alternative_spec.rb +26 -0
- data/spec/parslet/atoms/base_spec.rb +127 -0
- data/spec/parslet/atoms/capture_spec.rb +21 -0
- data/spec/parslet/atoms/combinations_spec.rb +5 -0
- data/spec/parslet/atoms/dsl_spec.rb +7 -0
- data/spec/parslet/atoms/entity_spec.rb +77 -0
- data/spec/parslet/atoms/ignored_spec.rb +15 -0
- data/spec/parslet/atoms/infix_spec.rb +5 -0
- data/spec/parslet/atoms/lookahead_spec.rb +22 -0
- data/spec/parslet/atoms/named_spec.rb +4 -0
- data/spec/parslet/atoms/re_spec.rb +14 -0
- data/spec/parslet/atoms/repetition_spec.rb +24 -0
- data/spec/parslet/atoms/scope_spec.rb +26 -0
- data/spec/parslet/atoms/sequence_spec.rb +28 -0
- data/spec/parslet/atoms/str_spec.rb +15 -0
- data/spec/parslet/atoms/visitor_spec.rb +101 -0
- data/spec/parslet/atoms_spec.rb +488 -0
- data/spec/parslet/convenience_spec.rb +54 -0
- data/spec/parslet/error_reporter/contextual_spec.rb +118 -0
- data/spec/parslet/error_reporter/deepest_spec.rb +82 -0
- data/spec/parslet/error_reporter/tree_spec.rb +7 -0
- data/spec/parslet/export_spec.rb +40 -0
- data/spec/parslet/expression/treetop_spec.rb +74 -0
- data/spec/parslet/minilisp.citrus +29 -0
- data/spec/parslet/minilisp.tt +29 -0
- data/spec/parslet/parser_spec.rb +36 -0
- data/spec/parslet/parslet_spec.rb +38 -0
- data/spec/parslet/pattern_spec.rb +272 -0
- data/spec/parslet/position_spec.rb +14 -0
- data/spec/parslet/rig/rspec_spec.rb +54 -0
- data/spec/parslet/scope_spec.rb +45 -0
- data/spec/parslet/slice_spec.rb +186 -0
- data/spec/parslet/source/line_cache_spec.rb +74 -0
- data/spec/parslet/source_spec.rb +210 -0
- data/spec/parslet/transform/context_spec.rb +56 -0
- data/spec/parslet/transform_spec.rb +183 -0
- data/spec/spec_helper.rb +74 -0
- data/spec/support/opal.rb +8 -0
- data/spec/support/opal.rb.erb +14 -0
- data/spec/support/parslet_matchers.rb +96 -0
- metadata +240 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
# This example demonstrates how to do deepest error reporting, as invented
|
2
|
+
# by John Mettraux (issue #64).
|
3
|
+
|
4
|
+
require 'parslet'
|
5
|
+
require 'parslet/convenience'
|
6
|
+
|
7
|
+
def prettify(str)
|
8
|
+
puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
|
9
|
+
str.lines.each_with_index do |line, index|
|
10
|
+
printf "%02d %s\n",
|
11
|
+
index+1,
|
12
|
+
line.chomp
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class DeepestErrorsParser < Parslet::Parser
|
17
|
+
# commons
|
18
|
+
|
19
|
+
rule(:space) { match('[ \t]').repeat(1) }
|
20
|
+
rule(:space?) { space.maybe }
|
21
|
+
|
22
|
+
rule(:newline) { match('[\r\n]') }
|
23
|
+
|
24
|
+
rule(:comment) { str('#') >> match('[^\r\n]').repeat }
|
25
|
+
|
26
|
+
rule(:line_separator) {
|
27
|
+
(space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
|
28
|
+
}
|
29
|
+
|
30
|
+
rule(:blank) { line_separator | space }
|
31
|
+
rule(:blank?) { blank.maybe }
|
32
|
+
|
33
|
+
rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
|
34
|
+
|
35
|
+
# res_statement
|
36
|
+
|
37
|
+
rule(:reference) {
|
38
|
+
(str('@').repeat(1,2) >> identifier).as(:reference)
|
39
|
+
}
|
40
|
+
|
41
|
+
rule(:res_action_or_link) {
|
42
|
+
str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
|
43
|
+
}
|
44
|
+
|
45
|
+
rule(:res_actions) {
|
46
|
+
(
|
47
|
+
reference
|
48
|
+
).as(:resources) >>
|
49
|
+
(
|
50
|
+
res_action_or_link.as(:res_action)
|
51
|
+
).repeat(0).as(:res_actions)
|
52
|
+
}
|
53
|
+
|
54
|
+
rule(:res_statement) {
|
55
|
+
res_actions >>
|
56
|
+
(str(':') >> identifier.as(:name)).maybe.as(:res_field)
|
57
|
+
}
|
58
|
+
|
59
|
+
# expression
|
60
|
+
|
61
|
+
rule(:expression) {
|
62
|
+
res_statement
|
63
|
+
}
|
64
|
+
|
65
|
+
# body
|
66
|
+
|
67
|
+
rule(:body) {
|
68
|
+
(line_separator >> (block | expression)).repeat(1).as(:body) >>
|
69
|
+
line_separator
|
70
|
+
}
|
71
|
+
|
72
|
+
# blocks
|
73
|
+
|
74
|
+
rule(:begin_block) {
|
75
|
+
(str('concurrent').as(:type) >> space).maybe.as(:pre) >>
|
76
|
+
str('begin').as(:begin) >>
|
77
|
+
body >>
|
78
|
+
str('end')
|
79
|
+
}
|
80
|
+
|
81
|
+
rule(:define_block) {
|
82
|
+
str('define').as(:define) >> space >>
|
83
|
+
identifier.as(:name) >> str('()') >>
|
84
|
+
body >>
|
85
|
+
str('end')
|
86
|
+
}
|
87
|
+
|
88
|
+
rule(:block) {
|
89
|
+
define_block | begin_block
|
90
|
+
}
|
91
|
+
|
92
|
+
# root
|
93
|
+
|
94
|
+
rule(:radix) {
|
95
|
+
line_separator.maybe >> block >> line_separator.maybe
|
96
|
+
}
|
97
|
+
|
98
|
+
root(:radix)
|
99
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'parslet'
|
2
|
+
|
3
|
+
# A small example that shows a really small parser and what happens on parser
|
4
|
+
# errors. This is used for documentation purposes to demonstrate basic
|
5
|
+
# parslet functionality and error handling.
|
6
|
+
|
7
|
+
module DocumentationExample
|
8
|
+
class MyParser < Parslet::Parser
|
9
|
+
rule(:a) { str('a').repeat }
|
10
|
+
|
11
|
+
def parse(str)
|
12
|
+
a.parse(str)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.parse_a_sequence(input)
|
17
|
+
parser = MyParser.new
|
18
|
+
parser.parse(input)
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.demonstrate_success
|
22
|
+
parse_a_sequence('aaaa')
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.demonstrate_failure
|
26
|
+
begin
|
27
|
+
parse_a_sequence('bbbb')
|
28
|
+
rescue Parslet::ParseFailed => e
|
29
|
+
e
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Example contributed by Hal Brodigan (postmodern). Thanks!
|
4
|
+
|
5
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
6
|
+
require 'parslet'
|
7
|
+
require 'parslet/convenience'
|
8
|
+
|
9
|
+
class EmailParser < Parslet::Parser
|
10
|
+
rule(:space) { match('\s').repeat(1) }
|
11
|
+
rule(:space?) { space.maybe }
|
12
|
+
rule(:dash?) { match['_-'].maybe }
|
13
|
+
|
14
|
+
rule(:at) {
|
15
|
+
str('@') |
|
16
|
+
(dash? >> (str('at') | str('AT')) >> dash?)
|
17
|
+
}
|
18
|
+
rule(:dot) {
|
19
|
+
str('.') |
|
20
|
+
(dash? >> (str('dot') | str('DOT')) >> dash?)
|
21
|
+
}
|
22
|
+
|
23
|
+
rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
|
24
|
+
rule(:separator) { dot.as(:dot) >> space? | space }
|
25
|
+
rule(:words) { word >> (separator >> word).repeat }
|
26
|
+
|
27
|
+
rule(:email) {
|
28
|
+
(words.as(:username) >> space? >> at >> space? >> words).as(:email)
|
29
|
+
}
|
30
|
+
|
31
|
+
root(:email)
|
32
|
+
end
|
33
|
+
|
34
|
+
class EmailSanitizer < Parslet::Transform
|
35
|
+
rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
|
36
|
+
rule(:word => simple(:word)) { word }
|
37
|
+
|
38
|
+
rule(:username => sequence(:username)) { username.join + "@" }
|
39
|
+
rule(:username => simple(:username)) { username.to_s + "@" }
|
40
|
+
|
41
|
+
rule(:email => sequence(:email)) { email.join }
|
42
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# Example that demonstrates how a simple erb-like parser could be constructed.
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
|
5
|
+
class ErbParser < Parslet::Parser
|
6
|
+
rule(:ruby) { (str('%>').absent? >> any).repeat.as(:ruby) }
|
7
|
+
|
8
|
+
rule(:expression) { (str('=') >> ruby).as(:expression) }
|
9
|
+
rule(:comment) { (str('#') >> ruby).as(:comment) }
|
10
|
+
rule(:code) { ruby.as(:code) }
|
11
|
+
rule(:erb) { expression | comment | code }
|
12
|
+
|
13
|
+
rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
|
14
|
+
rule(:text) { (str('<%').absent? >> any).repeat(1) }
|
15
|
+
|
16
|
+
rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
|
17
|
+
root(:text_with_ruby)
|
18
|
+
end
|
19
|
+
|
20
|
+
class ErbTransform < Parslet::Transform
|
21
|
+
def initialize(binding_context = binding)
|
22
|
+
super()
|
23
|
+
@erb_binding = binding_context
|
24
|
+
|
25
|
+
# Define rules with closures that capture the binding
|
26
|
+
rule(:code => { :ruby => simple(:ruby) }) do |dict|
|
27
|
+
eval(dict[:ruby].to_s, @erb_binding)
|
28
|
+
''
|
29
|
+
end
|
30
|
+
|
31
|
+
rule(:expression => { :ruby => simple(:ruby) }) do |dict|
|
32
|
+
eval(dict[:ruby].to_s, @erb_binding)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
rule(:comment => { :ruby => simple(:ruby) }) { '' }
|
37
|
+
rule(:text => simple(:text)) { text }
|
38
|
+
rule(:text => sequence(:texts)) { texts.join }
|
39
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# This example is heavily inspired by citrus' ip.citrus. Have a look at both
|
2
|
+
# of these to get some choice!
|
3
|
+
|
4
|
+
# The grammars in this file conform to the ABNF given in Appendix A of RFC 3986
|
5
|
+
# Uniform Resource Identifier (URI): Generic Syntax.
|
6
|
+
#
|
7
|
+
# See http://tools.ietf.org/html/rfc3986#appendix-A for more information.
|
8
|
+
|
9
|
+
require 'parslet'
|
10
|
+
|
11
|
+
module IpAddressExample
|
12
|
+
module IPv4
|
13
|
+
include Parslet
|
14
|
+
|
15
|
+
# A host identified by an IPv4 literal address is represented in
|
16
|
+
# dotted-decimal notation (a sequence of four decimal numbers in the range 0
|
17
|
+
# to 255, separated by "."), as described in [RFC1123] by reference to
|
18
|
+
# [RFC0952]. Note that other forms of dotted notation may be interpreted on
|
19
|
+
# some platforms, as described in Section 7.4, but only the dotted-decimal
|
20
|
+
# form of four octets is allowed by this grammar.
|
21
|
+
rule(:ipv4) {
|
22
|
+
(dec_octet >> str('.') >> dec_octet >> str('.') >>
|
23
|
+
dec_octet >> str('.') >> dec_octet).as(:ipv4)
|
24
|
+
}
|
25
|
+
|
26
|
+
rule(:dec_octet) {
|
27
|
+
str('25') >> match("[0-5]") |
|
28
|
+
str('2') >> match("[0-4]") >> digit |
|
29
|
+
str('1') >> digit >> digit |
|
30
|
+
match('[1-9]') >> digit |
|
31
|
+
digit
|
32
|
+
}
|
33
|
+
|
34
|
+
rule(:digit) {
|
35
|
+
match('[0-9]')
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
# Must be used in concert with IPv4
|
40
|
+
module IPv6
|
41
|
+
include Parslet
|
42
|
+
|
43
|
+
rule(:colon) { str(':') }
|
44
|
+
rule(:dcolon) { colon >> colon }
|
45
|
+
|
46
|
+
# h16 :
|
47
|
+
def h16r(times)
|
48
|
+
(h16 >> colon).repeat(times, times)
|
49
|
+
end
|
50
|
+
|
51
|
+
# : h16
|
52
|
+
def h16l(times)
|
53
|
+
(colon >> h16).repeat(0,times)
|
54
|
+
end
|
55
|
+
|
56
|
+
# A 128-bit IPv6 address is divided into eight 16-bit pieces. Each piece is
|
57
|
+
# represented numerically in case-insensitive hexadecimal, using one to four
|
58
|
+
# hexadecimal digits (leading zeroes are permitted). The eight encoded
|
59
|
+
# pieces are given most-significant first, separated by colon characters.
|
60
|
+
# Optionally, the least-significant two pieces may instead be represented in
|
61
|
+
# IPv4 address textual format. A sequence of one or more consecutive
|
62
|
+
# zero-valued 16-bit pieces within the address may be elided, omitting all
|
63
|
+
# their digits and leaving exactly two consecutive colons in their place to
|
64
|
+
# mark the elision.
|
65
|
+
rule(:ipv6) {
|
66
|
+
(
|
67
|
+
(
|
68
|
+
h16r(6) |
|
69
|
+
dcolon >> h16r(5) |
|
70
|
+
h16.maybe >> dcolon >> h16r(4) |
|
71
|
+
(h16 >> h16l(1)).maybe >> dcolon >> h16r(3) |
|
72
|
+
(h16 >> h16l(2)).maybe >> dcolon >> h16r(2) |
|
73
|
+
(h16 >> h16l(3)).maybe >> dcolon >> h16r(1) |
|
74
|
+
(h16 >> h16l(4)).maybe >> dcolon
|
75
|
+
) >> ls32 |
|
76
|
+
(h16 >> h16l(5)).maybe >> dcolon >> h16 |
|
77
|
+
(h16 >> h16l(6)).maybe >> dcolon
|
78
|
+
).as(:ipv6)
|
79
|
+
}
|
80
|
+
|
81
|
+
rule(:h16) {
|
82
|
+
hexdigit.repeat(1,4)
|
83
|
+
}
|
84
|
+
|
85
|
+
rule(:ls32) {
|
86
|
+
(h16 >> colon >> h16) |
|
87
|
+
ipv4
|
88
|
+
}
|
89
|
+
|
90
|
+
rule(:hexdigit) {
|
91
|
+
digit | match("[a-fA-F]")
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
class Parser
|
96
|
+
include IPv4
|
97
|
+
include IPv6
|
98
|
+
|
99
|
+
def parse(str)
|
100
|
+
(ipv4 | ipv6).parse(str)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
#
|
4
|
+
# MIT License - (c) 2011 John Mettraux
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'parslet' # gem install parslet
|
9
|
+
|
10
|
+
|
11
|
+
module MyJson
|
12
|
+
|
13
|
+
class Parser < Parslet::Parser
|
14
|
+
|
15
|
+
rule(:spaces) { match('\s').repeat(1) }
|
16
|
+
rule(:spaces?) { spaces.maybe }
|
17
|
+
|
18
|
+
rule(:comma) { spaces? >> str(',') >> spaces? }
|
19
|
+
rule(:digit) { match('[0-9]') }
|
20
|
+
|
21
|
+
rule(:number) {
|
22
|
+
(
|
23
|
+
str('-').maybe >> (
|
24
|
+
str('0') | (match('[1-9]') >> digit.repeat)
|
25
|
+
) >> (
|
26
|
+
str('.') >> digit.repeat(1)
|
27
|
+
).maybe >> (
|
28
|
+
match('[eE]') >> (str('+') | str('-')).maybe >> digit.repeat(1)
|
29
|
+
).maybe
|
30
|
+
).as(:number)
|
31
|
+
}
|
32
|
+
|
33
|
+
rule(:string) {
|
34
|
+
str('"') >> (
|
35
|
+
str('\\') >> any | str('"').absent? >> any
|
36
|
+
).repeat.as(:string) >> str('"')
|
37
|
+
}
|
38
|
+
|
39
|
+
rule(:array) {
|
40
|
+
str('[') >> spaces? >>
|
41
|
+
(value >> (comma >> value).repeat).maybe.as(:array) >>
|
42
|
+
spaces? >> str(']')
|
43
|
+
}
|
44
|
+
|
45
|
+
rule(:object) {
|
46
|
+
str('{') >> spaces? >>
|
47
|
+
(entry >> (comma >> entry).repeat).maybe.as(:object) >>
|
48
|
+
spaces? >> str('}')
|
49
|
+
}
|
50
|
+
|
51
|
+
rule(:value) {
|
52
|
+
string | number |
|
53
|
+
object | array |
|
54
|
+
str('true').as(:true) | str('false').as(:false) |
|
55
|
+
str('null').as(:null)
|
56
|
+
}
|
57
|
+
|
58
|
+
rule(:entry) {
|
59
|
+
(
|
60
|
+
string.as(:key) >> spaces? >>
|
61
|
+
str(':') >> spaces? >>
|
62
|
+
value.as(:val)
|
63
|
+
).as(:entry)
|
64
|
+
}
|
65
|
+
|
66
|
+
rule(:attribute) { (entry | value).as(:attribute) }
|
67
|
+
|
68
|
+
rule(:top) { spaces? >> value >> spaces? }
|
69
|
+
|
70
|
+
root(:top)
|
71
|
+
end
|
72
|
+
|
73
|
+
class Transformer < Parslet::Transform
|
74
|
+
|
75
|
+
class Entry < Struct.new(:key, :val); end
|
76
|
+
|
77
|
+
rule(:array => subtree(:ar)) {
|
78
|
+
ar.is_a?(Array) ? ar : [ ar ]
|
79
|
+
}
|
80
|
+
rule(:object => subtree(:ob)) {
|
81
|
+
(ob.is_a?(Array) ? ob : [ ob ]).inject({}) { |h, e| h[e.key] = e.val; h }
|
82
|
+
}
|
83
|
+
|
84
|
+
rule(:entry => { :key => simple(:ke), :val => simple(:va) }) {
|
85
|
+
Entry.new(ke, va)
|
86
|
+
}
|
87
|
+
|
88
|
+
rule(:string => simple(:st)) {
|
89
|
+
st.to_s
|
90
|
+
}
|
91
|
+
rule(:number => simple(:nb)) {
|
92
|
+
nb.match(/[eE\.]/) ? Float(nb) : Integer(nb)
|
93
|
+
}
|
94
|
+
|
95
|
+
rule(:null => simple(:nu)) { nil }
|
96
|
+
rule(:true => simple(:tr)) { true }
|
97
|
+
rule(:false => simple(:fa)) { false }
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.parse(s)
|
101
|
+
parser = Parser.new
|
102
|
+
transformer = Transformer.new
|
103
|
+
|
104
|
+
tree = parser.parse(s)
|
105
|
+
transformer.apply(tree)
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# An exploration of two ideas:
|
2
|
+
# a) Constructing a whole parser inline, without the artificial class around
|
3
|
+
# it.
|
4
|
+
# and:
|
5
|
+
# b) Constructing non-greedy or non-blind parsers by transforming the
|
6
|
+
# grammar.
|
7
|
+
|
8
|
+
require 'parslet'
|
9
|
+
|
10
|
+
module LocalExample
|
11
|
+
extend Parslet
|
12
|
+
|
13
|
+
def self.this(name, &block)
|
14
|
+
Parslet::Atoms::Entity.new(name, &block)
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.epsilon
|
18
|
+
any.absent?
|
19
|
+
end
|
20
|
+
|
21
|
+
# Traditional repetition will try as long as the pattern can be matched and
|
22
|
+
# then give up. This is greedy and blind.
|
23
|
+
def self.greedy_blind_parser
|
24
|
+
str('a').as(:e) >> this('a') { greedy_blind_parser }.as(:rec) | epsilon
|
25
|
+
end
|
26
|
+
|
27
|
+
# Here's a pattern match that is greedy and non-blind. The first pattern
|
28
|
+
# 'a'* will be tried as many times as possible, while still matching the
|
29
|
+
# end pattern 'aa'.
|
30
|
+
def self.greedy_non_blind_parser
|
31
|
+
str('aa').as(:e2) >> epsilon | str('a').as(:e1) >> this('b') { greedy_non_blind_parser }.as(:rec)
|
32
|
+
end
|
33
|
+
|
34
|
+
# Simple inline parser without class wrapper - fixed to prevent infinite loops
|
35
|
+
# This parser matches exactly 'aa' or 'aaa' or longer sequences ending in 'aa'
|
36
|
+
def self.simple_inline_parser
|
37
|
+
str('aa') | str('aaa') | str('aaaa') | str('aaaaa') | str('aaaaaa')
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.parse_with_greedy_blind(input)
|
41
|
+
greedy_blind_parser.parse(input)
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.parse_with_greedy_non_blind(input)
|
45
|
+
greedy_non_blind_parser.parse(input)
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.parse_with_simple_inline(input)
|
49
|
+
simple_inline_parser.parse(input)
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.demonstrate_local_variables
|
53
|
+
# Demonstrates local variable usage in parser construction
|
54
|
+
pattern_a = str('a')
|
55
|
+
pattern_aa = str('aa')
|
56
|
+
# Use a simple combination that works reliably
|
57
|
+
combined = pattern_aa | pattern_a >> pattern_aa | pattern_a >> pattern_a >> pattern_aa
|
58
|
+
combined
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# Demonstrates that we have a compatibility fix to mathn's weird idea of
|
2
|
+
# integer mathematics.
|
3
|
+
# This was contributed by Jonathan Hinkle (https://github.com/hynkle). Thanks!
|
4
|
+
|
5
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
6
|
+
|
7
|
+
require 'parslet'
|
8
|
+
require 'parslet/convenience'
|
9
|
+
include Parslet
|
10
|
+
|
11
|
+
def attempt_parse
|
12
|
+
possible_whitespace = match['\s'].repeat
|
13
|
+
|
14
|
+
cephalopod =
|
15
|
+
str('octopus') |
|
16
|
+
str('squid')
|
17
|
+
|
18
|
+
parenthesized_cephalopod =
|
19
|
+
str('(') >>
|
20
|
+
possible_whitespace >>
|
21
|
+
cephalopod >>
|
22
|
+
possible_whitespace >>
|
23
|
+
str(')')
|
24
|
+
|
25
|
+
parser =
|
26
|
+
possible_whitespace >>
|
27
|
+
parenthesized_cephalopod >>
|
28
|
+
possible_whitespace
|
29
|
+
|
30
|
+
# This parse fails, but that is not the point. When mathn is in the current
|
31
|
+
# ruby environment, it modifies integer division in a way that makes
|
32
|
+
# parslet loop indefinitely.
|
33
|
+
parser.parse %{(\nsqeed)\n}
|
34
|
+
rescue Parslet::ParseFailed
|
35
|
+
end
|
36
|
+
|
37
|
+
attempt_parse
|
38
|
+
puts 'it terminates before we require mathn'
|
39
|
+
|
40
|
+
puts "requiring mathn now"
|
41
|
+
# mathn was deprecated as of Ruby 2.5
|
42
|
+
if RUBY_VERSION.gsub(/[^\d]/, '').to_i < 250
|
43
|
+
require 'mathn'
|
44
|
+
end
|
45
|
+
puts "and trying again (will hang without the fix)"
|
46
|
+
attempt_parse # but it doesn't terminate after requiring mathn
|
47
|
+
puts "okay!"
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# Reproduces [1] using parslet.
|
2
|
+
# [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
|
3
|
+
|
4
|
+
require 'pp'
|
5
|
+
require 'parslet'
|
6
|
+
require 'parslet/export'
|
7
|
+
require 'parslet/convenience'
|
8
|
+
|
9
|
+
module MiniLisp
|
10
|
+
class Parser < Parslet::Parser
|
11
|
+
root :expression
|
12
|
+
rule(:expression) {
|
13
|
+
space? >> str('(') >> space? >> body >> str(')') >> space?
|
14
|
+
}
|
15
|
+
|
16
|
+
rule(:body) {
|
17
|
+
(expression | identifier | float | integer | string).repeat.as(:exp)
|
18
|
+
}
|
19
|
+
|
20
|
+
rule(:space) {
|
21
|
+
match('\s').repeat(1)
|
22
|
+
}
|
23
|
+
rule(:space?) {
|
24
|
+
space.maybe
|
25
|
+
}
|
26
|
+
|
27
|
+
rule(:identifier) {
|
28
|
+
(match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
|
29
|
+
}
|
30
|
+
|
31
|
+
rule(:float) {
|
32
|
+
(
|
33
|
+
integer >> (
|
34
|
+
str('.') >> match('[0-9]').repeat(1) |
|
35
|
+
str('e') >> match('[0-9]').repeat(1)
|
36
|
+
).as(:e)
|
37
|
+
).as(:float) >> space?
|
38
|
+
}
|
39
|
+
|
40
|
+
rule(:integer) {
|
41
|
+
((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
|
42
|
+
}
|
43
|
+
|
44
|
+
rule(:string) {
|
45
|
+
str('"') >> (
|
46
|
+
str('\\') >> any |
|
47
|
+
str('"').absent? >> any
|
48
|
+
).repeat.as(:string) >> str('"') >> space?
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
class Transform
|
53
|
+
include Parslet
|
54
|
+
|
55
|
+
attr_reader :t
|
56
|
+
def initialize
|
57
|
+
@t = Parslet::Transform.new
|
58
|
+
|
59
|
+
# To understand these, take a look at what comes out of the parser.
|
60
|
+
t.rule(:identifier => simple(:ident)) { ident.to_sym }
|
61
|
+
|
62
|
+
t.rule(:string => simple(:str)) { str }
|
63
|
+
|
64
|
+
t.rule(:integer => simple(:int)) { Integer(int) }
|
65
|
+
|
66
|
+
t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
|
67
|
+
|
68
|
+
t.rule(:exp => subtree(:exp)) { exp }
|
69
|
+
end
|
70
|
+
|
71
|
+
def do(tree)
|
72
|
+
t.apply(tree)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'parslet'
|
2
|
+
|
3
|
+
# Demonstrates modular parsers, split out over many classes. Please look at
|
4
|
+
# ip_address.rb as well.
|
5
|
+
|
6
|
+
module ModularityExample
|
7
|
+
module ALanguage
|
8
|
+
include Parslet
|
9
|
+
|
10
|
+
# Parslet rules are really a special kind of method. Mix them into your
|
11
|
+
# classes!
|
12
|
+
rule(:a_language) { str('aaa') }
|
13
|
+
end
|
14
|
+
|
15
|
+
# Parslet parsers are parslet atoms as well. Create an instance and chain them
|
16
|
+
# to your other rules.
|
17
|
+
#
|
18
|
+
class BLanguage < Parslet::Parser
|
19
|
+
root :blang
|
20
|
+
|
21
|
+
rule(:blang) { str('bbb') }
|
22
|
+
end
|
23
|
+
|
24
|
+
# Parslet atoms are really Ruby values, pass them around.
|
25
|
+
def self.c_language
|
26
|
+
Parslet.str('ccc')
|
27
|
+
end
|
28
|
+
|
29
|
+
class Language < Parslet::Parser
|
30
|
+
def initialize(c_language)
|
31
|
+
@c_language = c_language
|
32
|
+
super()
|
33
|
+
end
|
34
|
+
|
35
|
+
root :root
|
36
|
+
|
37
|
+
include ALanguage
|
38
|
+
|
39
|
+
rule(:root) { str('a(') >> a_language >> str(')') >> space |
|
40
|
+
str('b(') >> BLanguage.new >> str(')') >> space |
|
41
|
+
str('c(') >> @c_language >> str(')') >> space }
|
42
|
+
rule(:space) { str(' ').maybe }
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.parse_a_language(input = 'a(aaa)')
|
46
|
+
Language.new(c_language).parse(input)
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.parse_b_language(input = 'b(bbb)')
|
50
|
+
Language.new(c_language).parse(input)
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.parse_c_language(input = 'c(ccc)')
|
54
|
+
Language.new(c_language).parse(input)
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.create_parser
|
58
|
+
Language.new(c_language)
|
59
|
+
end
|
60
|
+
end
|