parslet 1.3.0 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +38 -1
- data/README +33 -21
- data/example/deepest_errors.rb +131 -0
- data/example/email_parser.rb +2 -6
- data/example/ignore.rb +2 -2
- data/example/json.rb +0 -3
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/lib/parslet.rb +65 -51
- data/lib/parslet/atoms.rb +1 -1
- data/lib/parslet/atoms/alternative.rb +11 -12
- data/lib/parslet/atoms/base.rb +57 -99
- data/lib/parslet/atoms/can_flatten.rb +9 -4
- data/lib/parslet/atoms/context.rb +26 -4
- data/lib/parslet/atoms/entity.rb +5 -10
- data/lib/parslet/atoms/lookahead.rb +11 -7
- data/lib/parslet/atoms/named.rb +8 -12
- data/lib/parslet/atoms/re.rb +10 -9
- data/lib/parslet/atoms/repetition.rb +23 -24
- data/lib/parslet/atoms/sequence.rb +10 -16
- data/lib/parslet/atoms/str.rb +11 -13
- data/lib/parslet/cause.rb +45 -13
- data/lib/parslet/convenience.rb +6 -6
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +4 -4
- data/lib/parslet/expression.rb +0 -2
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +2 -6
- data/lib/parslet/pattern.rb +15 -4
- data/lib/parslet/pattern/binding.rb +3 -3
- data/lib/parslet/rig/rspec.rb +2 -2
- data/lib/parslet/slice.rb +0 -6
- data/lib/parslet/source.rb +40 -59
- data/lib/parslet/source/line_cache.rb +2 -2
- data/lib/parslet/transform.rb +13 -7
- data/lib/parslet/transform/context.rb +1 -1
- metadata +69 -26
- data/example/ignore_whitespace.rb +0 -66
- data/lib/parslet/bytecode.rb +0 -6
- data/lib/parslet/bytecode/compiler.rb +0 -138
- data/lib/parslet/bytecode/instructions.rb +0 -358
- data/lib/parslet/bytecode/vm.rb +0 -209
- data/lib/parslet/error_tree.rb +0 -50
data/HISTORY.txt
CHANGED
@@ -3,8 +3,45 @@
|
|
3
3
|
- prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
|
4
4
|
the win.
|
5
5
|
|
6
|
-
= 1.
|
6
|
+
= 1.4.0 / 25May2012
|
7
7
|
|
8
|
+
+ Revised documentation. A few new API features have finally made it into
|
9
|
+
the documentation. Examples in the documentation are now curated and
|
10
|
+
run against the current code so that they really really work.
|
11
|
+
Also, the website generation tools have been replaced with 2012-style
|
12
|
+
tools. Much less pain to update now.
|
13
|
+
|
14
|
+
+ Parslet::Source now doesn't hold a StringIO, it directly holds the
|
15
|
+
buffer to be parsed. The api of Source has changed a tiny bit. This change
|
16
|
+
has been made for speed optimisation reasons.
|
17
|
+
|
18
|
+
+ :reporter argument to parse, allowing to customize error reporting within
|
19
|
+
wide boundaries. See issue #64 for a discussion.
|
20
|
+
Included are two error reporters, one (default) with the existing error
|
21
|
+
tree functionality, one reporting deepest errors as defined by the above
|
22
|
+
ticket.
|
23
|
+
|
24
|
+
+ Optimistic parse: Parsing is two phase, with the first phase assuming
|
25
|
+
there will be no errors. This yields ~ 20% speed improvement in the
|
26
|
+
case where the parse succeeds.
|
27
|
+
Also, internal error handling is now using tuples. This and other
|
28
|
+
optimizations have yielded ~ 30% overall improvement.
|
29
|
+
|
30
|
+
! #error_tree and #cause removed from all of parslet. The
|
31
|
+
Parslet::ParseFailed exception now contains a #cause field that can
|
32
|
+
be asked for an #ascii_tree as before.
|
33
|
+
Cleaner internal error handling, not stateful in atoms anymore. Some
|
34
|
+
parsers will see correct error reporting for the first time. (issue #65)
|
35
|
+
|
36
|
+
+ Made it possible to pass a custom Parslet::Source implementor to #parse.
|
37
|
+
(see #63)
|
38
|
+
|
39
|
+
+ #parse has now a second argument that is an options hash. See
|
40
|
+
Parslet::Atoms::Base#parse for documentation.
|
41
|
+
|
42
|
+
- VM engine on the way out. No benefit except for the intellectual
|
43
|
+
challenge.
|
44
|
+
|
8
45
|
= 1.3.0 / 5Mar2012
|
9
46
|
|
10
47
|
! Parslet::Transform::Context is now much more well-behaved. It has
|
data/README
CHANGED
@@ -18,26 +18,38 @@ SYNOPSIS
|
|
18
18
|
require 'parslet'
|
19
19
|
include Parslet
|
20
20
|
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
#
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
21
|
+
# parslet parses strings
|
22
|
+
str('foo').
|
23
|
+
parse('foo') # => "foo"@0
|
24
|
+
|
25
|
+
# it matches character sets
|
26
|
+
match['abc'].parse('a') # => "a"@0
|
27
|
+
match['abc'].parse('b') # => "b"@0
|
28
|
+
match['abc'].parse('c') # => "c"@0
|
29
|
+
|
30
|
+
# and it annotates its output
|
31
|
+
str('foo').as(:important_bit).
|
32
|
+
parse('foo') # => {:important_bit=>"foo"@0}
|
33
|
+
|
34
|
+
# you can construct parsers with just a few lines
|
35
|
+
quote = str('"')
|
36
|
+
simple_string = quote >> (quote.absent? >> any).repeat >> quote
|
37
|
+
|
38
|
+
simple_string.
|
39
|
+
parse('"Simple Simple Simple"') # => "\"Simple Simple Simple\""@0
|
40
|
+
|
41
|
+
# or by making a fuss about it
|
42
|
+
class Smalltalk < Parslet::Parser
|
43
|
+
root :smalltalk
|
44
|
+
|
45
|
+
rule(:smalltalk) { statements }
|
46
|
+
rule(:statements) {
|
47
|
+
# insert smalltalk parser here (outside of the scope of this readme)
|
48
|
+
}
|
39
49
|
end
|
40
|
-
|
50
|
+
|
51
|
+
# and then
|
52
|
+
Smalltalk.new.parse('smalltalk')
|
41
53
|
|
42
54
|
COMPATIBILITY
|
43
55
|
|
@@ -53,6 +65,6 @@ ruby-1.8.7-p334 for better results.
|
|
53
65
|
|
54
66
|
STATUS
|
55
67
|
|
56
|
-
At version 1.
|
68
|
+
At version 1.4.0 - See HISTORY.txt for changes.
|
57
69
|
|
58
|
-
(c) 2010 Kaspar Schiess
|
70
|
+
(c) 2010, 2011, 2012 Kaspar Schiess
|
@@ -0,0 +1,131 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
# This example demonstrates how to do deepest error reporting, as invented
|
4
|
+
# by John Mettraux (issue #64).
|
5
|
+
|
6
|
+
require 'parslet'
|
7
|
+
require 'parslet/convenience'
|
8
|
+
|
9
|
+
def prettify(str)
|
10
|
+
puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
|
11
|
+
str.lines.each_with_index do |line, index|
|
12
|
+
printf "%02d %s\n",
|
13
|
+
index+1,
|
14
|
+
line.chomp
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class Parser < Parslet::Parser
|
19
|
+
# commons
|
20
|
+
|
21
|
+
rule(:space) { match('[ \t]').repeat(1) }
|
22
|
+
rule(:space?) { space.maybe }
|
23
|
+
|
24
|
+
rule(:newline) { match('[\r\n]') }
|
25
|
+
|
26
|
+
rule(:comment) { str('#') >> match('[^\r\n]').repeat }
|
27
|
+
|
28
|
+
rule(:line_separator) {
|
29
|
+
(space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
|
30
|
+
}
|
31
|
+
|
32
|
+
rule(:blank) { line_separator | space }
|
33
|
+
rule(:blank?) { blank.maybe }
|
34
|
+
|
35
|
+
rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
|
36
|
+
|
37
|
+
# res_statement
|
38
|
+
|
39
|
+
rule(:reference) {
|
40
|
+
(str('@').repeat(1,2) >> identifier).as(:reference)
|
41
|
+
}
|
42
|
+
|
43
|
+
rule(:res_action_or_link) {
|
44
|
+
str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
|
45
|
+
}
|
46
|
+
|
47
|
+
rule(:res_actions) {
|
48
|
+
(
|
49
|
+
reference
|
50
|
+
).as(:resources) >>
|
51
|
+
(
|
52
|
+
res_action_or_link.as(:res_action)
|
53
|
+
).repeat(0).as(:res_actions)
|
54
|
+
}
|
55
|
+
|
56
|
+
rule(:res_statement) {
|
57
|
+
res_actions >>
|
58
|
+
(str(':') >> identifier.as(:name)).maybe.as(:res_field)
|
59
|
+
}
|
60
|
+
|
61
|
+
# expression
|
62
|
+
|
63
|
+
rule(:expression) {
|
64
|
+
res_statement
|
65
|
+
}
|
66
|
+
|
67
|
+
# body
|
68
|
+
|
69
|
+
rule(:body) {
|
70
|
+
(line_separator >> (block | expression)).repeat(1).as(:body) >>
|
71
|
+
line_separator
|
72
|
+
}
|
73
|
+
|
74
|
+
# blocks
|
75
|
+
|
76
|
+
rule(:begin_block) {
|
77
|
+
(str('concurrent').as(:type) >> space).maybe.as(:pre) >>
|
78
|
+
str('begin').as(:begin) >>
|
79
|
+
body >>
|
80
|
+
str('end')
|
81
|
+
}
|
82
|
+
|
83
|
+
rule(:define_block) {
|
84
|
+
str('define').as(:define) >> space >>
|
85
|
+
identifier.as(:name) >> str('()') >>
|
86
|
+
body >>
|
87
|
+
str('end')
|
88
|
+
}
|
89
|
+
|
90
|
+
rule(:block) {
|
91
|
+
define_block | begin_block
|
92
|
+
}
|
93
|
+
|
94
|
+
# root
|
95
|
+
|
96
|
+
rule(:radix) {
|
97
|
+
line_separator.maybe >> block >> line_separator.maybe
|
98
|
+
}
|
99
|
+
|
100
|
+
root(:radix)
|
101
|
+
end
|
102
|
+
|
103
|
+
ds = [
|
104
|
+
%{
|
105
|
+
define f()
|
106
|
+
@res.name
|
107
|
+
end
|
108
|
+
},
|
109
|
+
%{
|
110
|
+
define f()
|
111
|
+
begin
|
112
|
+
@res.name
|
113
|
+
end
|
114
|
+
end
|
115
|
+
}
|
116
|
+
]
|
117
|
+
|
118
|
+
ds.each do |d|
|
119
|
+
|
120
|
+
puts '-' * 80
|
121
|
+
prettify(d)
|
122
|
+
|
123
|
+
parser = Parser.new
|
124
|
+
|
125
|
+
begin
|
126
|
+
parser.parse_with_debug(d,
|
127
|
+
:reporter => Parslet::ErrorReporter::Deepest.new)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
puts '-' * 80
|
data/example/email_parser.rb
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
|
5
5
|
$:.unshift File.dirname(__FILE__) + "/../lib"
|
6
6
|
require 'parslet'
|
7
|
+
require 'parslet/convenience'
|
7
8
|
|
8
9
|
class EmailParser < Parslet::Parser
|
9
10
|
rule(:space) { match('\s').repeat(1) }
|
@@ -48,9 +49,4 @@ unless ARGV[0]
|
|
48
49
|
STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com"
|
49
50
|
end
|
50
51
|
|
51
|
-
|
52
|
-
p sanitizer.apply(parser.parse(ARGV[0] || 'a.b.c.d@gmail.com'))
|
53
|
-
rescue Parslet::ParseFailed => error
|
54
|
-
puts error
|
55
|
-
puts parser.error_tree
|
56
|
-
end
|
52
|
+
p sanitizer.apply(parser.parse_with_debug(ARGV[0] || 'a.b.c.d@gmail.com'))
|
data/example/ignore.rb
CHANGED
@@ -11,9 +11,9 @@ class IgnoreParslet < Parslet::Atoms::Base
|
|
11
11
|
@parslet.to_s(prec)
|
12
12
|
end
|
13
13
|
def try(source, context)
|
14
|
-
result = @parslet.try(source, context)
|
14
|
+
success, value = result = @parslet.try(source, context)
|
15
15
|
|
16
|
-
return
|
16
|
+
return succ(nil) if success
|
17
17
|
return result
|
18
18
|
end
|
19
19
|
|
data/example/json.rb
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require "parslet"
|
5
|
+
|
6
|
+
# Demonstrates modular parsers, split out over many classes. Please look at
|
7
|
+
# ip_address.rb as well.
|
8
|
+
|
9
|
+
module ALanguage
|
10
|
+
include Parslet
|
11
|
+
|
12
|
+
# Parslet rules are really a special kind of method. Mix them into your
|
13
|
+
# classes!
|
14
|
+
rule(:a_language) { str('aaa') }
|
15
|
+
end
|
16
|
+
|
17
|
+
# Parslet parsers are parslet atoms as well. Create an instance and chain them
|
18
|
+
# to your other rules.
|
19
|
+
#
|
20
|
+
class BLanguage < Parslet::Parser
|
21
|
+
root :blang
|
22
|
+
|
23
|
+
rule(:blang) { str('bbb') }
|
24
|
+
end
|
25
|
+
|
26
|
+
# Parslet atoms are really Ruby values, pass them around.
|
27
|
+
c_language = Parslet.str('ccc')
|
28
|
+
|
29
|
+
class Language < Parslet::Parser
|
30
|
+
def initialize(c_language)
|
31
|
+
@c_language = c_language
|
32
|
+
super()
|
33
|
+
end
|
34
|
+
|
35
|
+
root :root
|
36
|
+
|
37
|
+
include ALanguage
|
38
|
+
|
39
|
+
rule(:root) { str('a(') >> a_language >> str(')') >> space |
|
40
|
+
str('b(') >> BLanguage.new >> str(')') >> space |
|
41
|
+
str('c(') >> @c_language >> str(')') >> space }
|
42
|
+
rule(:space) { str(' ').maybe }
|
43
|
+
end
|
44
|
+
|
45
|
+
Language.new(c_language).parse('a(aaa)')
|
46
|
+
Language.new(c_language).parse('b(bbb)')
|
47
|
+
Language.new(c_language).parse('c(ccc)')
|
@@ -0,0 +1,132 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
require 'parslet/convenience'
|
5
|
+
|
6
|
+
# This example demonstrates tree error reporting in a real life example.
|
7
|
+
# The parser code has been contributed by John Mettraux.
|
8
|
+
|
9
|
+
def prettify(str)
|
10
|
+
puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
|
11
|
+
str.lines.each_with_index do |line, index|
|
12
|
+
printf "%02d %s\n",
|
13
|
+
index+1,
|
14
|
+
line.chomp
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class Parser < Parslet::Parser
|
19
|
+
|
20
|
+
# commons
|
21
|
+
|
22
|
+
rule(:space) { match('[ \t]').repeat(1) }
|
23
|
+
rule(:space?) { space.maybe }
|
24
|
+
|
25
|
+
rule(:newline) { match('[\r\n]') }
|
26
|
+
|
27
|
+
rule(:comment) { str('#') >> match('[^\r\n]').repeat }
|
28
|
+
|
29
|
+
rule(:line_separator) {
|
30
|
+
(space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
|
31
|
+
}
|
32
|
+
|
33
|
+
rule(:blank) { line_separator | space }
|
34
|
+
rule(:blank?) { blank.maybe }
|
35
|
+
|
36
|
+
rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
|
37
|
+
|
38
|
+
# res_statement
|
39
|
+
|
40
|
+
rule(:reference) {
|
41
|
+
(str('@').repeat(1,2) >> identifier).as(:reference)
|
42
|
+
}
|
43
|
+
|
44
|
+
rule(:res_action_or_link) {
|
45
|
+
str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
|
46
|
+
}
|
47
|
+
|
48
|
+
rule(:res_actions) {
|
49
|
+
(
|
50
|
+
reference
|
51
|
+
).as(:resources) >>
|
52
|
+
(
|
53
|
+
res_action_or_link.as(:res_action)
|
54
|
+
).repeat(0).as(:res_actions)
|
55
|
+
}
|
56
|
+
|
57
|
+
rule(:res_statement) {
|
58
|
+
res_actions >>
|
59
|
+
(str(':') >> identifier.as(:name)).maybe.as(:res_field)
|
60
|
+
}
|
61
|
+
|
62
|
+
# expression
|
63
|
+
|
64
|
+
rule(:expression) {
|
65
|
+
res_statement
|
66
|
+
}
|
67
|
+
|
68
|
+
# body
|
69
|
+
|
70
|
+
rule(:body) {
|
71
|
+
(line_separator >> (block | expression)).repeat(1).as(:body) >>
|
72
|
+
line_separator
|
73
|
+
}
|
74
|
+
|
75
|
+
# blocks
|
76
|
+
|
77
|
+
rule(:begin_block) {
|
78
|
+
(str('concurrent').as(:type) >> space).maybe.as(:pre) >>
|
79
|
+
str('begin').as(:begin) >>
|
80
|
+
body >>
|
81
|
+
str('end')
|
82
|
+
}
|
83
|
+
|
84
|
+
rule(:define_block) {
|
85
|
+
str('define').as(:define) >> space >>
|
86
|
+
identifier.as(:name) >> str('()') >>
|
87
|
+
body >>
|
88
|
+
str('end')
|
89
|
+
}
|
90
|
+
|
91
|
+
rule(:block) {
|
92
|
+
define_block | begin_block
|
93
|
+
}
|
94
|
+
|
95
|
+
# root
|
96
|
+
|
97
|
+
rule(:radix) {
|
98
|
+
line_separator.maybe >> block >> line_separator.maybe
|
99
|
+
}
|
100
|
+
|
101
|
+
root(:radix)
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
ds = [
|
106
|
+
%{
|
107
|
+
define f()
|
108
|
+
@res.name
|
109
|
+
end
|
110
|
+
},
|
111
|
+
%{
|
112
|
+
define f()
|
113
|
+
begin
|
114
|
+
@res.name
|
115
|
+
end
|
116
|
+
end
|
117
|
+
}
|
118
|
+
]
|
119
|
+
|
120
|
+
ds.each do |d|
|
121
|
+
|
122
|
+
puts '-' * 80
|
123
|
+
prettify(d)
|
124
|
+
|
125
|
+
parser = Parser.new
|
126
|
+
|
127
|
+
begin
|
128
|
+
parser.parse_with_debug(d)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
puts '-' * 80
|