parslet 1.3.0 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. data/HISTORY.txt +38 -1
  2. data/README +33 -21
  3. data/example/deepest_errors.rb +131 -0
  4. data/example/email_parser.rb +2 -6
  5. data/example/ignore.rb +2 -2
  6. data/example/json.rb +0 -3
  7. data/example/modularity.rb +47 -0
  8. data/example/nested_errors.rb +132 -0
  9. data/example/output/deepest_errors.out +54 -0
  10. data/example/output/modularity.out +0 -0
  11. data/example/output/nested_errors.out +54 -0
  12. data/lib/parslet.rb +65 -51
  13. data/lib/parslet/atoms.rb +1 -1
  14. data/lib/parslet/atoms/alternative.rb +11 -12
  15. data/lib/parslet/atoms/base.rb +57 -99
  16. data/lib/parslet/atoms/can_flatten.rb +9 -4
  17. data/lib/parslet/atoms/context.rb +26 -4
  18. data/lib/parslet/atoms/entity.rb +5 -10
  19. data/lib/parslet/atoms/lookahead.rb +11 -7
  20. data/lib/parslet/atoms/named.rb +8 -12
  21. data/lib/parslet/atoms/re.rb +10 -9
  22. data/lib/parslet/atoms/repetition.rb +23 -24
  23. data/lib/parslet/atoms/sequence.rb +10 -16
  24. data/lib/parslet/atoms/str.rb +11 -13
  25. data/lib/parslet/cause.rb +45 -13
  26. data/lib/parslet/convenience.rb +6 -6
  27. data/lib/parslet/error_reporter.rb +7 -0
  28. data/lib/parslet/error_reporter/deepest.rb +95 -0
  29. data/lib/parslet/error_reporter/tree.rb +57 -0
  30. data/lib/parslet/export.rb +4 -4
  31. data/lib/parslet/expression.rb +0 -2
  32. data/lib/parslet/expression/treetop.rb +2 -2
  33. data/lib/parslet/parser.rb +2 -6
  34. data/lib/parslet/pattern.rb +15 -4
  35. data/lib/parslet/pattern/binding.rb +3 -3
  36. data/lib/parslet/rig/rspec.rb +2 -2
  37. data/lib/parslet/slice.rb +0 -6
  38. data/lib/parslet/source.rb +40 -59
  39. data/lib/parslet/source/line_cache.rb +2 -2
  40. data/lib/parslet/transform.rb +13 -7
  41. data/lib/parslet/transform/context.rb +1 -1
  42. metadata +69 -26
  43. data/example/ignore_whitespace.rb +0 -66
  44. data/lib/parslet/bytecode.rb +0 -6
  45. data/lib/parslet/bytecode/compiler.rb +0 -138
  46. data/lib/parslet/bytecode/instructions.rb +0 -358
  47. data/lib/parslet/bytecode/vm.rb +0 -209
  48. data/lib/parslet/error_tree.rb +0 -50
data/HISTORY.txt CHANGED
@@ -3,8 +3,45 @@
3
3
  - prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
4
4
  the win.
5
5
 
6
- = 1.3.1 / ???
6
+ = 1.4.0 / 25May2012
7
7
 
8
+ + Revised documentation. A few new API features have finally made it into
9
+ the documentation. Examples in the documentation are now curated and
10
+ run against the current code so that they really really work.
11
+ Also, the website generation tools have been replaced with 2012-style
12
+ tools. Much less pain to update now.
13
+
14
+ + Parslet::Source now doesn't hold a StringIO, it directly holds the
15
+ buffer to be parsed. The api of Source has changed a tiny bit. This change
16
+ has been made for speed optimisation reasons.
17
+
18
+ + :reporter argument to parse, allowing to customize error reporting within
19
+ wide boundaries. See issue #64 for a discussion.
20
+ Included are two error reporters, one (default) with the existing error
21
+ tree functionality, one reporting deepest errors as defined by the above
22
+ ticket.
23
+
24
+ + Optimistic parse: Parsing is two phase, with the first phase assuming
25
+ there will be no errors. This yields ~ 20% speed improvement in the
26
+ case where the parse succeeds.
27
+ Also, internal error handling is now using tuples. This and other
28
+ optimizations have yielded ~ 30% overall improvement.
29
+
30
+ ! #error_tree and #cause removed from all of parslet. The
31
+ Parslet::ParseFailed exception now contains a #cause field that can
32
+ be asked for an #ascii_tree as before.
33
+ Cleaner internal error handling, not stateful in atoms anymore. Some
34
+ parsers will see correct error reporting for the first time. (issue #65)
35
+
36
+ + Made it possible to pass a custom Parslet::Source implementor to #parse.
37
+ (see #63)
38
+
39
+ + #parse has now a second argument that is an options hash. See
40
+ Parslet::Atoms::Base#parse for documentation.
41
+
42
+ - VM engine on the way out. No benefit except for the intellectual
43
+ challenge.
44
+
8
45
  = 1.3.0 / 5Mar2012
9
46
 
10
47
  ! Parslet::Transform::Context is now much more well-behaved. It has
data/README CHANGED
@@ -18,26 +18,38 @@ SYNOPSIS
18
18
  require 'parslet'
19
19
  include Parslet
20
20
 
21
- # Constructs a parser using a Parser Expression Grammar like DSL:
22
- parser = str('"') >>
23
- (
24
- str('\\') >> any |
25
- str('"').absnt? >> any
26
- ).repeat.as(:string) >>
27
- str('"')
28
-
29
- # Parse the string and capture parts of the interpretation (:string above)
30
- tree = parser.parse('"This is a \\"String\\" in which you can escape stuff"')
31
-
32
- tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
33
-
34
- # Here's how you can grab results from that tree:
35
-
36
- transform = Parslet::Transform.new do
37
- rule(:string => simple(:x)) {
38
- puts "String contents: #{x}" }
21
+ # parslet parses strings
22
+ str('foo').
23
+ parse('foo') # => "foo"@0
24
+
25
+ # it matches character sets
26
+ match['abc'].parse('a') # => "a"@0
27
+ match['abc'].parse('b') # => "b"@0
28
+ match['abc'].parse('c') # => "c"@0
29
+
30
+ # and it annotates its output
31
+ str('foo').as(:important_bit).
32
+ parse('foo') # => {:important_bit=>"foo"@0}
33
+
34
+ # you can construct parsers with just a few lines
35
+ quote = str('"')
36
+ simple_string = quote >> (quote.absent? >> any).repeat >> quote
37
+
38
+ simple_string.
39
+ parse('"Simple Simple Simple"') # => "\"Simple Simple Simple\""@0
40
+
41
+ # or by making a fuss about it
42
+ class Smalltalk < Parslet::Parser
43
+ root :smalltalk
44
+
45
+ rule(:smalltalk) { statements }
46
+ rule(:statements) {
47
+ # insert smalltalk parser here (outside of the scope of this readme)
48
+ }
39
49
  end
40
- transform.apply(tree)
50
+
51
+ # and then
52
+ Smalltalk.new.parse('smalltalk')
41
53
 
42
54
  COMPATIBILITY
43
55
 
@@ -53,6 +65,6 @@ ruby-1.8.7-p334 for better results.
53
65
 
54
66
  STATUS
55
67
 
56
- At version 1.3.0 - See HISTORY.txt for changes.
68
+ At version 1.4.0 - See HISTORY.txt for changes.
57
69
 
58
- (c) 2010 Kaspar Schiess
70
+ (c) 2010, 2011, 2012 Kaspar Schiess
@@ -0,0 +1,131 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ # This example demonstrates how to do deepest error reporting, as invented
4
+ # by John Mettraux (issue #64).
5
+
6
+ require 'parslet'
7
+ require 'parslet/convenience'
8
+
9
+ def prettify(str)
10
+ puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
11
+ str.lines.each_with_index do |line, index|
12
+ printf "%02d %s\n",
13
+ index+1,
14
+ line.chomp
15
+ end
16
+ end
17
+
18
+ class Parser < Parslet::Parser
19
+ # commons
20
+
21
+ rule(:space) { match('[ \t]').repeat(1) }
22
+ rule(:space?) { space.maybe }
23
+
24
+ rule(:newline) { match('[\r\n]') }
25
+
26
+ rule(:comment) { str('#') >> match('[^\r\n]').repeat }
27
+
28
+ rule(:line_separator) {
29
+ (space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
30
+ }
31
+
32
+ rule(:blank) { line_separator | space }
33
+ rule(:blank?) { blank.maybe }
34
+
35
+ rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
36
+
37
+ # res_statement
38
+
39
+ rule(:reference) {
40
+ (str('@').repeat(1,2) >> identifier).as(:reference)
41
+ }
42
+
43
+ rule(:res_action_or_link) {
44
+ str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
45
+ }
46
+
47
+ rule(:res_actions) {
48
+ (
49
+ reference
50
+ ).as(:resources) >>
51
+ (
52
+ res_action_or_link.as(:res_action)
53
+ ).repeat(0).as(:res_actions)
54
+ }
55
+
56
+ rule(:res_statement) {
57
+ res_actions >>
58
+ (str(':') >> identifier.as(:name)).maybe.as(:res_field)
59
+ }
60
+
61
+ # expression
62
+
63
+ rule(:expression) {
64
+ res_statement
65
+ }
66
+
67
+ # body
68
+
69
+ rule(:body) {
70
+ (line_separator >> (block | expression)).repeat(1).as(:body) >>
71
+ line_separator
72
+ }
73
+
74
+ # blocks
75
+
76
+ rule(:begin_block) {
77
+ (str('concurrent').as(:type) >> space).maybe.as(:pre) >>
78
+ str('begin').as(:begin) >>
79
+ body >>
80
+ str('end')
81
+ }
82
+
83
+ rule(:define_block) {
84
+ str('define').as(:define) >> space >>
85
+ identifier.as(:name) >> str('()') >>
86
+ body >>
87
+ str('end')
88
+ }
89
+
90
+ rule(:block) {
91
+ define_block | begin_block
92
+ }
93
+
94
+ # root
95
+
96
+ rule(:radix) {
97
+ line_separator.maybe >> block >> line_separator.maybe
98
+ }
99
+
100
+ root(:radix)
101
+ end
102
+
103
+ ds = [
104
+ %{
105
+ define f()
106
+ @res.name
107
+ end
108
+ },
109
+ %{
110
+ define f()
111
+ begin
112
+ @res.name
113
+ end
114
+ end
115
+ }
116
+ ]
117
+
118
+ ds.each do |d|
119
+
120
+ puts '-' * 80
121
+ prettify(d)
122
+
123
+ parser = Parser.new
124
+
125
+ begin
126
+ parser.parse_with_debug(d,
127
+ :reporter => Parslet::ErrorReporter::Deepest.new)
128
+ end
129
+ end
130
+
131
+ puts '-' * 80
@@ -4,6 +4,7 @@
4
4
 
5
5
  $:.unshift File.dirname(__FILE__) + "/../lib"
6
6
  require 'parslet'
7
+ require 'parslet/convenience'
7
8
 
8
9
  class EmailParser < Parslet::Parser
9
10
  rule(:space) { match('\s').repeat(1) }
@@ -48,9 +49,4 @@ unless ARGV[0]
48
49
  STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com"
49
50
  end
50
51
 
51
- begin
52
- p sanitizer.apply(parser.parse(ARGV[0] || 'a.b.c.d@gmail.com'))
53
- rescue Parslet::ParseFailed => error
54
- puts error
55
- puts parser.error_tree
56
- end
52
+ p sanitizer.apply(parser.parse_with_debug(ARGV[0] || 'a.b.c.d@gmail.com'))
data/example/ignore.rb CHANGED
@@ -11,9 +11,9 @@ class IgnoreParslet < Parslet::Atoms::Base
11
11
  @parslet.to_s(prec)
12
12
  end
13
13
  def try(source, context)
14
- result = @parslet.try(source, context)
14
+ success, value = result = @parslet.try(source, context)
15
15
 
16
- return success(nil) unless result.error?
16
+ return succ(nil) if success
17
17
  return result
18
18
  end
19
19
 
data/example/json.rb CHANGED
@@ -107,9 +107,6 @@ module MyJson
107
107
  out = transformer.apply(tree)
108
108
 
109
109
  out
110
-
111
- rescue Parslet::ParseFailed => e
112
- puts e, parser.root.error_tree
113
110
  end
114
111
  end
115
112
 
@@ -0,0 +1,47 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ require 'pp'
4
+ require "parslet"
5
+
6
+ # Demonstrates modular parsers, split out over many classes. Please look at
7
+ # ip_address.rb as well.
8
+
9
+ module ALanguage
10
+ include Parslet
11
+
12
+ # Parslet rules are really a special kind of method. Mix them into your
13
+ # classes!
14
+ rule(:a_language) { str('aaa') }
15
+ end
16
+
17
+ # Parslet parsers are parslet atoms as well. Create an instance and chain them
18
+ # to your other rules.
19
+ #
20
+ class BLanguage < Parslet::Parser
21
+ root :blang
22
+
23
+ rule(:blang) { str('bbb') }
24
+ end
25
+
26
+ # Parslet atoms are really Ruby values, pass them around.
27
+ c_language = Parslet.str('ccc')
28
+
29
+ class Language < Parslet::Parser
30
+ def initialize(c_language)
31
+ @c_language = c_language
32
+ super()
33
+ end
34
+
35
+ root :root
36
+
37
+ include ALanguage
38
+
39
+ rule(:root) { str('a(') >> a_language >> str(')') >> space |
40
+ str('b(') >> BLanguage.new >> str(')') >> space |
41
+ str('c(') >> @c_language >> str(')') >> space }
42
+ rule(:space) { str(' ').maybe }
43
+ end
44
+
45
+ Language.new(c_language).parse('a(aaa)')
46
+ Language.new(c_language).parse('b(bbb)')
47
+ Language.new(c_language).parse('c(ccc)')
@@ -0,0 +1,132 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ require 'parslet'
4
+ require 'parslet/convenience'
5
+
6
+ # This example demonstrates tree error reporting in a real life example.
7
+ # The parser code has been contributed by John Mettraux.
8
+
9
+ def prettify(str)
10
+ puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
11
+ str.lines.each_with_index do |line, index|
12
+ printf "%02d %s\n",
13
+ index+1,
14
+ line.chomp
15
+ end
16
+ end
17
+
18
+ class Parser < Parslet::Parser
19
+
20
+ # commons
21
+
22
+ rule(:space) { match('[ \t]').repeat(1) }
23
+ rule(:space?) { space.maybe }
24
+
25
+ rule(:newline) { match('[\r\n]') }
26
+
27
+ rule(:comment) { str('#') >> match('[^\r\n]').repeat }
28
+
29
+ rule(:line_separator) {
30
+ (space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
31
+ }
32
+
33
+ rule(:blank) { line_separator | space }
34
+ rule(:blank?) { blank.maybe }
35
+
36
+ rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
37
+
38
+ # res_statement
39
+
40
+ rule(:reference) {
41
+ (str('@').repeat(1,2) >> identifier).as(:reference)
42
+ }
43
+
44
+ rule(:res_action_or_link) {
45
+ str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
46
+ }
47
+
48
+ rule(:res_actions) {
49
+ (
50
+ reference
51
+ ).as(:resources) >>
52
+ (
53
+ res_action_or_link.as(:res_action)
54
+ ).repeat(0).as(:res_actions)
55
+ }
56
+
57
+ rule(:res_statement) {
58
+ res_actions >>
59
+ (str(':') >> identifier.as(:name)).maybe.as(:res_field)
60
+ }
61
+
62
+ # expression
63
+
64
+ rule(:expression) {
65
+ res_statement
66
+ }
67
+
68
+ # body
69
+
70
+ rule(:body) {
71
+ (line_separator >> (block | expression)).repeat(1).as(:body) >>
72
+ line_separator
73
+ }
74
+
75
+ # blocks
76
+
77
+ rule(:begin_block) {
78
+ (str('concurrent').as(:type) >> space).maybe.as(:pre) >>
79
+ str('begin').as(:begin) >>
80
+ body >>
81
+ str('end')
82
+ }
83
+
84
+ rule(:define_block) {
85
+ str('define').as(:define) >> space >>
86
+ identifier.as(:name) >> str('()') >>
87
+ body >>
88
+ str('end')
89
+ }
90
+
91
+ rule(:block) {
92
+ define_block | begin_block
93
+ }
94
+
95
+ # root
96
+
97
+ rule(:radix) {
98
+ line_separator.maybe >> block >> line_separator.maybe
99
+ }
100
+
101
+ root(:radix)
102
+ end
103
+
104
+
105
+ ds = [
106
+ %{
107
+ define f()
108
+ @res.name
109
+ end
110
+ },
111
+ %{
112
+ define f()
113
+ begin
114
+ @res.name
115
+ end
116
+ end
117
+ }
118
+ ]
119
+
120
+ ds.each do |d|
121
+
122
+ puts '-' * 80
123
+ prettify(d)
124
+
125
+ parser = Parser.new
126
+
127
+ begin
128
+ parser.parse_with_debug(d)
129
+ end
130
+ end
131
+
132
+ puts '-' * 80