parslet 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.txt +38 -1
- data/README +33 -21
- data/example/deepest_errors.rb +131 -0
- data/example/email_parser.rb +2 -6
- data/example/ignore.rb +2 -2
- data/example/json.rb +0 -3
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/lib/parslet.rb +65 -51
- data/lib/parslet/atoms.rb +1 -1
- data/lib/parslet/atoms/alternative.rb +11 -12
- data/lib/parslet/atoms/base.rb +57 -99
- data/lib/parslet/atoms/can_flatten.rb +9 -4
- data/lib/parslet/atoms/context.rb +26 -4
- data/lib/parslet/atoms/entity.rb +5 -10
- data/lib/parslet/atoms/lookahead.rb +11 -7
- data/lib/parslet/atoms/named.rb +8 -12
- data/lib/parslet/atoms/re.rb +10 -9
- data/lib/parslet/atoms/repetition.rb +23 -24
- data/lib/parslet/atoms/sequence.rb +10 -16
- data/lib/parslet/atoms/str.rb +11 -13
- data/lib/parslet/cause.rb +45 -13
- data/lib/parslet/convenience.rb +6 -6
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +4 -4
- data/lib/parslet/expression.rb +0 -2
- data/lib/parslet/expression/treetop.rb +2 -2
- data/lib/parslet/parser.rb +2 -6
- data/lib/parslet/pattern.rb +15 -4
- data/lib/parslet/pattern/binding.rb +3 -3
- data/lib/parslet/rig/rspec.rb +2 -2
- data/lib/parslet/slice.rb +0 -6
- data/lib/parslet/source.rb +40 -59
- data/lib/parslet/source/line_cache.rb +2 -2
- data/lib/parslet/transform.rb +13 -7
- data/lib/parslet/transform/context.rb +1 -1
- metadata +69 -26
- data/example/ignore_whitespace.rb +0 -66
- data/lib/parslet/bytecode.rb +0 -6
- data/lib/parslet/bytecode/compiler.rb +0 -138
- data/lib/parslet/bytecode/instructions.rb +0 -358
- data/lib/parslet/bytecode/vm.rb +0 -209
- data/lib/parslet/error_tree.rb +0 -50
data/HISTORY.txt
CHANGED
@@ -3,8 +3,45 @@
|
|
3
3
|
- prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
|
4
4
|
the win.
|
5
5
|
|
6
|
-
= 1.
|
6
|
+
= 1.4.0 / 25May2012
|
7
7
|
|
8
|
+
+ Revised documentation. A few new API features have finally made it into
|
9
|
+
the documentation. Examples in the documentation are now curated and
|
10
|
+
run against the current code so that they really really work.
|
11
|
+
Also, the website generation tools have been replaced with 2012-style
|
12
|
+
tools. Much less pain to update now.
|
13
|
+
|
14
|
+
+ Parslet::Source now doesn't hold a StringIO, it directly holds the
|
15
|
+
buffer to be parsed. The api of Source has changed a tiny bit. This change
|
16
|
+
has been made for speed optimisation reasons.
|
17
|
+
|
18
|
+
+ :reporter argument to parse, allowing to customize error reporting within
|
19
|
+
wide boundaries. See issue #64 for a discussion.
|
20
|
+
Included are two error reporters, one (default) with the existing error
|
21
|
+
tree functionality, one reporting deepest errors as defined by the above
|
22
|
+
ticket.
|
23
|
+
|
24
|
+
+ Optimistic parse: Parsing is two phase, with the first phase assuming
|
25
|
+
there will be no errors. This yields ~ 20% speed improvement in the
|
26
|
+
case where the parse succeeds.
|
27
|
+
Also, internal error handling is now using tuples. This and other
|
28
|
+
optimizations have yielded ~ 30% overall improvement.
|
29
|
+
|
30
|
+
! #error_tree and #cause removed from all of parslet. The
|
31
|
+
Parslet::ParseFailed exception now contains a #cause field that can
|
32
|
+
be asked for an #ascii_tree as before.
|
33
|
+
Cleaner internal error handling, not stateful in atoms anymore. Some
|
34
|
+
parsers will see correct error reporting for the first time. (issue #65)
|
35
|
+
|
36
|
+
+ Made it possible to pass a custom Parslet::Source implementor to #parse.
|
37
|
+
(see #63)
|
38
|
+
|
39
|
+
+ #parse has now a second argument that is an options hash. See
|
40
|
+
Parslet::Atoms::Base#parse for documentation.
|
41
|
+
|
42
|
+
- VM engine on the way out. No benefit except for the intellectual
|
43
|
+
challenge.
|
44
|
+
|
8
45
|
= 1.3.0 / 5Mar2012
|
9
46
|
|
10
47
|
! Parslet::Transform::Context is now much more well-behaved. It has
|
data/README
CHANGED
@@ -18,26 +18,38 @@ SYNOPSIS
|
|
18
18
|
require 'parslet'
|
19
19
|
include Parslet
|
20
20
|
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
#
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
21
|
+
# parslet parses strings
|
22
|
+
str('foo').
|
23
|
+
parse('foo') # => "foo"@0
|
24
|
+
|
25
|
+
# it matches character sets
|
26
|
+
match['abc'].parse('a') # => "a"@0
|
27
|
+
match['abc'].parse('b') # => "b"@0
|
28
|
+
match['abc'].parse('c') # => "c"@0
|
29
|
+
|
30
|
+
# and it annotates its output
|
31
|
+
str('foo').as(:important_bit).
|
32
|
+
parse('foo') # => {:important_bit=>"foo"@0}
|
33
|
+
|
34
|
+
# you can construct parsers with just a few lines
|
35
|
+
quote = str('"')
|
36
|
+
simple_string = quote >> (quote.absent? >> any).repeat >> quote
|
37
|
+
|
38
|
+
simple_string.
|
39
|
+
parse('"Simple Simple Simple"') # => "\"Simple Simple Simple\""@0
|
40
|
+
|
41
|
+
# or by making a fuss about it
|
42
|
+
class Smalltalk < Parslet::Parser
|
43
|
+
root :smalltalk
|
44
|
+
|
45
|
+
rule(:smalltalk) { statements }
|
46
|
+
rule(:statements) {
|
47
|
+
# insert smalltalk parser here (outside of the scope of this readme)
|
48
|
+
}
|
39
49
|
end
|
40
|
-
|
50
|
+
|
51
|
+
# and then
|
52
|
+
Smalltalk.new.parse('smalltalk')
|
41
53
|
|
42
54
|
COMPATIBILITY
|
43
55
|
|
@@ -53,6 +65,6 @@ ruby-1.8.7-p334 for better results.
|
|
53
65
|
|
54
66
|
STATUS
|
55
67
|
|
56
|
-
At version 1.
|
68
|
+
At version 1.4.0 - See HISTORY.txt for changes.
|
57
69
|
|
58
|
-
(c) 2010 Kaspar Schiess
|
70
|
+
(c) 2010, 2011, 2012 Kaspar Schiess
|
@@ -0,0 +1,131 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
# This example demonstrates how to do deepest error reporting, as invented
|
4
|
+
# by John Mettraux (issue #64).
|
5
|
+
|
6
|
+
require 'parslet'
|
7
|
+
require 'parslet/convenience'
|
8
|
+
|
9
|
+
def prettify(str)
|
10
|
+
puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
|
11
|
+
str.lines.each_with_index do |line, index|
|
12
|
+
printf "%02d %s\n",
|
13
|
+
index+1,
|
14
|
+
line.chomp
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class Parser < Parslet::Parser
|
19
|
+
# commons
|
20
|
+
|
21
|
+
rule(:space) { match('[ \t]').repeat(1) }
|
22
|
+
rule(:space?) { space.maybe }
|
23
|
+
|
24
|
+
rule(:newline) { match('[\r\n]') }
|
25
|
+
|
26
|
+
rule(:comment) { str('#') >> match('[^\r\n]').repeat }
|
27
|
+
|
28
|
+
rule(:line_separator) {
|
29
|
+
(space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
|
30
|
+
}
|
31
|
+
|
32
|
+
rule(:blank) { line_separator | space }
|
33
|
+
rule(:blank?) { blank.maybe }
|
34
|
+
|
35
|
+
rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
|
36
|
+
|
37
|
+
# res_statement
|
38
|
+
|
39
|
+
rule(:reference) {
|
40
|
+
(str('@').repeat(1,2) >> identifier).as(:reference)
|
41
|
+
}
|
42
|
+
|
43
|
+
rule(:res_action_or_link) {
|
44
|
+
str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
|
45
|
+
}
|
46
|
+
|
47
|
+
rule(:res_actions) {
|
48
|
+
(
|
49
|
+
reference
|
50
|
+
).as(:resources) >>
|
51
|
+
(
|
52
|
+
res_action_or_link.as(:res_action)
|
53
|
+
).repeat(0).as(:res_actions)
|
54
|
+
}
|
55
|
+
|
56
|
+
rule(:res_statement) {
|
57
|
+
res_actions >>
|
58
|
+
(str(':') >> identifier.as(:name)).maybe.as(:res_field)
|
59
|
+
}
|
60
|
+
|
61
|
+
# expression
|
62
|
+
|
63
|
+
rule(:expression) {
|
64
|
+
res_statement
|
65
|
+
}
|
66
|
+
|
67
|
+
# body
|
68
|
+
|
69
|
+
rule(:body) {
|
70
|
+
(line_separator >> (block | expression)).repeat(1).as(:body) >>
|
71
|
+
line_separator
|
72
|
+
}
|
73
|
+
|
74
|
+
# blocks
|
75
|
+
|
76
|
+
rule(:begin_block) {
|
77
|
+
(str('concurrent').as(:type) >> space).maybe.as(:pre) >>
|
78
|
+
str('begin').as(:begin) >>
|
79
|
+
body >>
|
80
|
+
str('end')
|
81
|
+
}
|
82
|
+
|
83
|
+
rule(:define_block) {
|
84
|
+
str('define').as(:define) >> space >>
|
85
|
+
identifier.as(:name) >> str('()') >>
|
86
|
+
body >>
|
87
|
+
str('end')
|
88
|
+
}
|
89
|
+
|
90
|
+
rule(:block) {
|
91
|
+
define_block | begin_block
|
92
|
+
}
|
93
|
+
|
94
|
+
# root
|
95
|
+
|
96
|
+
rule(:radix) {
|
97
|
+
line_separator.maybe >> block >> line_separator.maybe
|
98
|
+
}
|
99
|
+
|
100
|
+
root(:radix)
|
101
|
+
end
|
102
|
+
|
103
|
+
ds = [
|
104
|
+
%{
|
105
|
+
define f()
|
106
|
+
@res.name
|
107
|
+
end
|
108
|
+
},
|
109
|
+
%{
|
110
|
+
define f()
|
111
|
+
begin
|
112
|
+
@res.name
|
113
|
+
end
|
114
|
+
end
|
115
|
+
}
|
116
|
+
]
|
117
|
+
|
118
|
+
ds.each do |d|
|
119
|
+
|
120
|
+
puts '-' * 80
|
121
|
+
prettify(d)
|
122
|
+
|
123
|
+
parser = Parser.new
|
124
|
+
|
125
|
+
begin
|
126
|
+
parser.parse_with_debug(d,
|
127
|
+
:reporter => Parslet::ErrorReporter::Deepest.new)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
puts '-' * 80
|
data/example/email_parser.rb
CHANGED
@@ -4,6 +4,7 @@
|
|
4
4
|
|
5
5
|
$:.unshift File.dirname(__FILE__) + "/../lib"
|
6
6
|
require 'parslet'
|
7
|
+
require 'parslet/convenience'
|
7
8
|
|
8
9
|
class EmailParser < Parslet::Parser
|
9
10
|
rule(:space) { match('\s').repeat(1) }
|
@@ -48,9 +49,4 @@ unless ARGV[0]
|
|
48
49
|
STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com"
|
49
50
|
end
|
50
51
|
|
51
|
-
|
52
|
-
p sanitizer.apply(parser.parse(ARGV[0] || 'a.b.c.d@gmail.com'))
|
53
|
-
rescue Parslet::ParseFailed => error
|
54
|
-
puts error
|
55
|
-
puts parser.error_tree
|
56
|
-
end
|
52
|
+
p sanitizer.apply(parser.parse_with_debug(ARGV[0] || 'a.b.c.d@gmail.com'))
|
data/example/ignore.rb
CHANGED
@@ -11,9 +11,9 @@ class IgnoreParslet < Parslet::Atoms::Base
|
|
11
11
|
@parslet.to_s(prec)
|
12
12
|
end
|
13
13
|
def try(source, context)
|
14
|
-
result = @parslet.try(source, context)
|
14
|
+
success, value = result = @parslet.try(source, context)
|
15
15
|
|
16
|
-
return
|
16
|
+
return succ(nil) if success
|
17
17
|
return result
|
18
18
|
end
|
19
19
|
|
data/example/json.rb
CHANGED
@@ -0,0 +1,47 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require "parslet"
|
5
|
+
|
6
|
+
# Demonstrates modular parsers, split out over many classes. Please look at
|
7
|
+
# ip_address.rb as well.
|
8
|
+
|
9
|
+
module ALanguage
|
10
|
+
include Parslet
|
11
|
+
|
12
|
+
# Parslet rules are really a special kind of method. Mix them into your
|
13
|
+
# classes!
|
14
|
+
rule(:a_language) { str('aaa') }
|
15
|
+
end
|
16
|
+
|
17
|
+
# Parslet parsers are parslet atoms as well. Create an instance and chain them
|
18
|
+
# to your other rules.
|
19
|
+
#
|
20
|
+
class BLanguage < Parslet::Parser
|
21
|
+
root :blang
|
22
|
+
|
23
|
+
rule(:blang) { str('bbb') }
|
24
|
+
end
|
25
|
+
|
26
|
+
# Parslet atoms are really Ruby values, pass them around.
|
27
|
+
c_language = Parslet.str('ccc')
|
28
|
+
|
29
|
+
class Language < Parslet::Parser
|
30
|
+
def initialize(c_language)
|
31
|
+
@c_language = c_language
|
32
|
+
super()
|
33
|
+
end
|
34
|
+
|
35
|
+
root :root
|
36
|
+
|
37
|
+
include ALanguage
|
38
|
+
|
39
|
+
rule(:root) { str('a(') >> a_language >> str(')') >> space |
|
40
|
+
str('b(') >> BLanguage.new >> str(')') >> space |
|
41
|
+
str('c(') >> @c_language >> str(')') >> space }
|
42
|
+
rule(:space) { str(' ').maybe }
|
43
|
+
end
|
44
|
+
|
45
|
+
Language.new(c_language).parse('a(aaa)')
|
46
|
+
Language.new(c_language).parse('b(bbb)')
|
47
|
+
Language.new(c_language).parse('c(ccc)')
|
@@ -0,0 +1,132 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
require 'parslet/convenience'
|
5
|
+
|
6
|
+
# This example demonstrates tree error reporting in a real life example.
|
7
|
+
# The parser code has been contributed by John Mettraux.
|
8
|
+
|
9
|
+
def prettify(str)
|
10
|
+
puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
|
11
|
+
str.lines.each_with_index do |line, index|
|
12
|
+
printf "%02d %s\n",
|
13
|
+
index+1,
|
14
|
+
line.chomp
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class Parser < Parslet::Parser
|
19
|
+
|
20
|
+
# commons
|
21
|
+
|
22
|
+
rule(:space) { match('[ \t]').repeat(1) }
|
23
|
+
rule(:space?) { space.maybe }
|
24
|
+
|
25
|
+
rule(:newline) { match('[\r\n]') }
|
26
|
+
|
27
|
+
rule(:comment) { str('#') >> match('[^\r\n]').repeat }
|
28
|
+
|
29
|
+
rule(:line_separator) {
|
30
|
+
(space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
|
31
|
+
}
|
32
|
+
|
33
|
+
rule(:blank) { line_separator | space }
|
34
|
+
rule(:blank?) { blank.maybe }
|
35
|
+
|
36
|
+
rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
|
37
|
+
|
38
|
+
# res_statement
|
39
|
+
|
40
|
+
rule(:reference) {
|
41
|
+
(str('@').repeat(1,2) >> identifier).as(:reference)
|
42
|
+
}
|
43
|
+
|
44
|
+
rule(:res_action_or_link) {
|
45
|
+
str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
|
46
|
+
}
|
47
|
+
|
48
|
+
rule(:res_actions) {
|
49
|
+
(
|
50
|
+
reference
|
51
|
+
).as(:resources) >>
|
52
|
+
(
|
53
|
+
res_action_or_link.as(:res_action)
|
54
|
+
).repeat(0).as(:res_actions)
|
55
|
+
}
|
56
|
+
|
57
|
+
rule(:res_statement) {
|
58
|
+
res_actions >>
|
59
|
+
(str(':') >> identifier.as(:name)).maybe.as(:res_field)
|
60
|
+
}
|
61
|
+
|
62
|
+
# expression
|
63
|
+
|
64
|
+
rule(:expression) {
|
65
|
+
res_statement
|
66
|
+
}
|
67
|
+
|
68
|
+
# body
|
69
|
+
|
70
|
+
rule(:body) {
|
71
|
+
(line_separator >> (block | expression)).repeat(1).as(:body) >>
|
72
|
+
line_separator
|
73
|
+
}
|
74
|
+
|
75
|
+
# blocks
|
76
|
+
|
77
|
+
rule(:begin_block) {
|
78
|
+
(str('concurrent').as(:type) >> space).maybe.as(:pre) >>
|
79
|
+
str('begin').as(:begin) >>
|
80
|
+
body >>
|
81
|
+
str('end')
|
82
|
+
}
|
83
|
+
|
84
|
+
rule(:define_block) {
|
85
|
+
str('define').as(:define) >> space >>
|
86
|
+
identifier.as(:name) >> str('()') >>
|
87
|
+
body >>
|
88
|
+
str('end')
|
89
|
+
}
|
90
|
+
|
91
|
+
rule(:block) {
|
92
|
+
define_block | begin_block
|
93
|
+
}
|
94
|
+
|
95
|
+
# root
|
96
|
+
|
97
|
+
rule(:radix) {
|
98
|
+
line_separator.maybe >> block >> line_separator.maybe
|
99
|
+
}
|
100
|
+
|
101
|
+
root(:radix)
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
ds = [
|
106
|
+
%{
|
107
|
+
define f()
|
108
|
+
@res.name
|
109
|
+
end
|
110
|
+
},
|
111
|
+
%{
|
112
|
+
define f()
|
113
|
+
begin
|
114
|
+
@res.name
|
115
|
+
end
|
116
|
+
end
|
117
|
+
}
|
118
|
+
]
|
119
|
+
|
120
|
+
ds.each do |d|
|
121
|
+
|
122
|
+
puts '-' * 80
|
123
|
+
prettify(d)
|
124
|
+
|
125
|
+
parser = Parser.new
|
126
|
+
|
127
|
+
begin
|
128
|
+
parser.parse_with_debug(d)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
puts '-' * 80
|