parslet 1.3.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/HISTORY.txt +38 -1
  2. data/README +33 -21
  3. data/example/deepest_errors.rb +131 -0
  4. data/example/email_parser.rb +2 -6
  5. data/example/ignore.rb +2 -2
  6. data/example/json.rb +0 -3
  7. data/example/modularity.rb +47 -0
  8. data/example/nested_errors.rb +132 -0
  9. data/example/output/deepest_errors.out +54 -0
  10. data/example/output/modularity.out +0 -0
  11. data/example/output/nested_errors.out +54 -0
  12. data/lib/parslet.rb +65 -51
  13. data/lib/parslet/atoms.rb +1 -1
  14. data/lib/parslet/atoms/alternative.rb +11 -12
  15. data/lib/parslet/atoms/base.rb +57 -99
  16. data/lib/parslet/atoms/can_flatten.rb +9 -4
  17. data/lib/parslet/atoms/context.rb +26 -4
  18. data/lib/parslet/atoms/entity.rb +5 -10
  19. data/lib/parslet/atoms/lookahead.rb +11 -7
  20. data/lib/parslet/atoms/named.rb +8 -12
  21. data/lib/parslet/atoms/re.rb +10 -9
  22. data/lib/parslet/atoms/repetition.rb +23 -24
  23. data/lib/parslet/atoms/sequence.rb +10 -16
  24. data/lib/parslet/atoms/str.rb +11 -13
  25. data/lib/parslet/cause.rb +45 -13
  26. data/lib/parslet/convenience.rb +6 -6
  27. data/lib/parslet/error_reporter.rb +7 -0
  28. data/lib/parslet/error_reporter/deepest.rb +95 -0
  29. data/lib/parslet/error_reporter/tree.rb +57 -0
  30. data/lib/parslet/export.rb +4 -4
  31. data/lib/parslet/expression.rb +0 -2
  32. data/lib/parslet/expression/treetop.rb +2 -2
  33. data/lib/parslet/parser.rb +2 -6
  34. data/lib/parslet/pattern.rb +15 -4
  35. data/lib/parslet/pattern/binding.rb +3 -3
  36. data/lib/parslet/rig/rspec.rb +2 -2
  37. data/lib/parslet/slice.rb +0 -6
  38. data/lib/parslet/source.rb +40 -59
  39. data/lib/parslet/source/line_cache.rb +2 -2
  40. data/lib/parslet/transform.rb +13 -7
  41. data/lib/parslet/transform/context.rb +1 -1
  42. metadata +69 -26
  43. data/example/ignore_whitespace.rb +0 -66
  44. data/lib/parslet/bytecode.rb +0 -6
  45. data/lib/parslet/bytecode/compiler.rb +0 -138
  46. data/lib/parslet/bytecode/instructions.rb +0 -358
  47. data/lib/parslet/bytecode/vm.rb +0 -209
  48. data/lib/parslet/error_tree.rb +0 -50
data/HISTORY.txt CHANGED
@@ -3,8 +3,45 @@
3
3
  - prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
4
4
  the win.
5
5
 
6
- = 1.3.1 / ???
6
+ = 1.4.0 / 25May2012
7
7
 
8
+ + Revised documentation. A few new API features have finally made it into
9
+ the documentation. Examples in the documentation are now curated and
10
+ run against the current code so that they really really work.
11
+ Also, the website generation tools have been replaced with 2012-style
12
+ tools. Much less pain to update now.
13
+
14
+ + Parslet::Source now doesn't hold a StringIO, it directly holds the
15
+ buffer to be parsed. The api of Source has changed a tiny bit. This change
16
+ has been made for speed optimisation reasons.
17
+
18
+ + :reporter argument to parse, allowing to customize error reporting within
19
+ wide boundaries. See issue #64 for a discussion.
20
+ Included are two error reporters, one (default) with the existing error
21
+ tree functionality, one reporting deepest errors as defined by the above
22
+ ticket.
23
+
24
+ + Optimistic parse: Parsing is two phase, with the first phase assuming
25
+ there will be no errors. This yields ~ 20% speed improvement in the
26
+ case where the parse succeeds.
27
+ Also, internal error handling is now using tuples. This and other
28
+ optimizations have yielded ~ 30% overall improvement.
29
+
30
+ ! #error_tree and #cause removed from all of parslet. The
31
+ Parslet::ParseFailed exception now contains a #cause field that can
32
+ be asked for an #ascii_tree as before.
33
+ Cleaner internal error handling, not stateful in atoms anymore. Some
34
+ parsers will see correct error reporting for the first time. (issue #65)
35
+
36
+ + Made it possible to pass a custom Parslet::Source implementor to #parse.
37
+ (see #63)
38
+
39
+ + #parse has now a second argument that is an options hash. See
40
+ Parslet::Atoms::Base#parse for documentation.
41
+
42
+ - VM engine on the way out. No benefit except for the intellectual
43
+ challenge.
44
+
8
45
  = 1.3.0 / 5Mar2012
9
46
 
10
47
  ! Parslet::Transform::Context is now much more well-behaved. It has
data/README CHANGED
@@ -18,26 +18,38 @@ SYNOPSIS
18
18
  require 'parslet'
19
19
  include Parslet
20
20
 
21
- # Constructs a parser using a Parser Expression Grammar like DSL:
22
- parser = str('"') >>
23
- (
24
- str('\\') >> any |
25
- str('"').absnt? >> any
26
- ).repeat.as(:string) >>
27
- str('"')
28
-
29
- # Parse the string and capture parts of the interpretation (:string above)
30
- tree = parser.parse('"This is a \\"String\\" in which you can escape stuff"')
31
-
32
- tree # => {:string=>"This is a \\\"String\\\" in which you can escape stuff"}
33
-
34
- # Here's how you can grab results from that tree:
35
-
36
- transform = Parslet::Transform.new do
37
- rule(:string => simple(:x)) {
38
- puts "String contents: #{x}" }
21
+ # parslet parses strings
22
+ str('foo').
23
+ parse('foo') # => "foo"@0
24
+
25
+ # it matches character sets
26
+ match['abc'].parse('a') # => "a"@0
27
+ match['abc'].parse('b') # => "b"@0
28
+ match['abc'].parse('c') # => "c"@0
29
+
30
+ # and it annotates its output
31
+ str('foo').as(:important_bit).
32
+ parse('foo') # => {:important_bit=>"foo"@0}
33
+
34
+ # you can construct parsers with just a few lines
35
+ quote = str('"')
36
+ simple_string = quote >> (quote.absent? >> any).repeat >> quote
37
+
38
+ simple_string.
39
+ parse('"Simple Simple Simple"') # => "\"Simple Simple Simple\""@0
40
+
41
+ # or by making a fuss about it
42
+ class Smalltalk < Parslet::Parser
43
+ root :smalltalk
44
+
45
+ rule(:smalltalk) { statements }
46
+ rule(:statements) {
47
+ # insert smalltalk parser here (outside of the scope of this readme)
48
+ }
39
49
  end
40
- transform.apply(tree)
50
+
51
+ # and then
52
+ Smalltalk.new.parse('smalltalk')
41
53
 
42
54
  COMPATIBILITY
43
55
 
@@ -53,6 +65,6 @@ ruby-1.8.7-p334 for better results.
53
65
 
54
66
  STATUS
55
67
 
56
- At version 1.3.0 - See HISTORY.txt for changes.
68
+ At version 1.4.0 - See HISTORY.txt for changes.
57
69
 
58
- (c) 2010 Kaspar Schiess
70
+ (c) 2010, 2011, 2012 Kaspar Schiess
@@ -0,0 +1,131 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ # This example demonstrates how to do deepest error reporting, as invented
4
+ # by John Mettraux (issue #64).
5
+
6
+ require 'parslet'
7
+ require 'parslet/convenience'
8
+
9
+ def prettify(str)
10
+ puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
11
+ str.lines.each_with_index do |line, index|
12
+ printf "%02d %s\n",
13
+ index+1,
14
+ line.chomp
15
+ end
16
+ end
17
+
18
+ class Parser < Parslet::Parser
19
+ # commons
20
+
21
+ rule(:space) { match('[ \t]').repeat(1) }
22
+ rule(:space?) { space.maybe }
23
+
24
+ rule(:newline) { match('[\r\n]') }
25
+
26
+ rule(:comment) { str('#') >> match('[^\r\n]').repeat }
27
+
28
+ rule(:line_separator) {
29
+ (space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
30
+ }
31
+
32
+ rule(:blank) { line_separator | space }
33
+ rule(:blank?) { blank.maybe }
34
+
35
+ rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
36
+
37
+ # res_statement
38
+
39
+ rule(:reference) {
40
+ (str('@').repeat(1,2) >> identifier).as(:reference)
41
+ }
42
+
43
+ rule(:res_action_or_link) {
44
+ str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
45
+ }
46
+
47
+ rule(:res_actions) {
48
+ (
49
+ reference
50
+ ).as(:resources) >>
51
+ (
52
+ res_action_or_link.as(:res_action)
53
+ ).repeat(0).as(:res_actions)
54
+ }
55
+
56
+ rule(:res_statement) {
57
+ res_actions >>
58
+ (str(':') >> identifier.as(:name)).maybe.as(:res_field)
59
+ }
60
+
61
+ # expression
62
+
63
+ rule(:expression) {
64
+ res_statement
65
+ }
66
+
67
+ # body
68
+
69
+ rule(:body) {
70
+ (line_separator >> (block | expression)).repeat(1).as(:body) >>
71
+ line_separator
72
+ }
73
+
74
+ # blocks
75
+
76
+ rule(:begin_block) {
77
+ (str('concurrent').as(:type) >> space).maybe.as(:pre) >>
78
+ str('begin').as(:begin) >>
79
+ body >>
80
+ str('end')
81
+ }
82
+
83
+ rule(:define_block) {
84
+ str('define').as(:define) >> space >>
85
+ identifier.as(:name) >> str('()') >>
86
+ body >>
87
+ str('end')
88
+ }
89
+
90
+ rule(:block) {
91
+ define_block | begin_block
92
+ }
93
+
94
+ # root
95
+
96
+ rule(:radix) {
97
+ line_separator.maybe >> block >> line_separator.maybe
98
+ }
99
+
100
+ root(:radix)
101
+ end
102
+
103
+ ds = [
104
+ %{
105
+ define f()
106
+ @res.name
107
+ end
108
+ },
109
+ %{
110
+ define f()
111
+ begin
112
+ @res.name
113
+ end
114
+ end
115
+ }
116
+ ]
117
+
118
+ ds.each do |d|
119
+
120
+ puts '-' * 80
121
+ prettify(d)
122
+
123
+ parser = Parser.new
124
+
125
+ begin
126
+ parser.parse_with_debug(d,
127
+ :reporter => Parslet::ErrorReporter::Deepest.new)
128
+ end
129
+ end
130
+
131
+ puts '-' * 80
@@ -4,6 +4,7 @@
4
4
 
5
5
  $:.unshift File.dirname(__FILE__) + "/../lib"
6
6
  require 'parslet'
7
+ require 'parslet/convenience'
7
8
 
8
9
  class EmailParser < Parslet::Parser
9
10
  rule(:space) { match('\s').repeat(1) }
@@ -48,9 +49,4 @@ unless ARGV[0]
48
49
  STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com"
49
50
  end
50
51
 
51
- begin
52
- p sanitizer.apply(parser.parse(ARGV[0] || 'a.b.c.d@gmail.com'))
53
- rescue Parslet::ParseFailed => error
54
- puts error
55
- puts parser.error_tree
56
- end
52
+ p sanitizer.apply(parser.parse_with_debug(ARGV[0] || 'a.b.c.d@gmail.com'))
data/example/ignore.rb CHANGED
@@ -11,9 +11,9 @@ class IgnoreParslet < Parslet::Atoms::Base
11
11
  @parslet.to_s(prec)
12
12
  end
13
13
  def try(source, context)
14
- result = @parslet.try(source, context)
14
+ success, value = result = @parslet.try(source, context)
15
15
 
16
- return success(nil) unless result.error?
16
+ return succ(nil) if success
17
17
  return result
18
18
  end
19
19
 
data/example/json.rb CHANGED
@@ -107,9 +107,6 @@ module MyJson
107
107
  out = transformer.apply(tree)
108
108
 
109
109
  out
110
-
111
- rescue Parslet::ParseFailed => e
112
- puts e, parser.root.error_tree
113
110
  end
114
111
  end
115
112
 
@@ -0,0 +1,47 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ require 'pp'
4
+ require "parslet"
5
+
6
+ # Demonstrates modular parsers, split out over many classes. Please look at
7
+ # ip_address.rb as well.
8
+
9
+ module ALanguage
10
+ include Parslet
11
+
12
+ # Parslet rules are really a special kind of method. Mix them into your
13
+ # classes!
14
+ rule(:a_language) { str('aaa') }
15
+ end
16
+
17
+ # Parslet parsers are parslet atoms as well. Create an instance and chain them
18
+ # to your other rules.
19
+ #
20
+ class BLanguage < Parslet::Parser
21
+ root :blang
22
+
23
+ rule(:blang) { str('bbb') }
24
+ end
25
+
26
+ # Parslet atoms are really Ruby values, pass them around.
27
+ c_language = Parslet.str('ccc')
28
+
29
+ class Language < Parslet::Parser
30
+ def initialize(c_language)
31
+ @c_language = c_language
32
+ super()
33
+ end
34
+
35
+ root :root
36
+
37
+ include ALanguage
38
+
39
+ rule(:root) { str('a(') >> a_language >> str(')') >> space |
40
+ str('b(') >> BLanguage.new >> str(')') >> space |
41
+ str('c(') >> @c_language >> str(')') >> space }
42
+ rule(:space) { str(' ').maybe }
43
+ end
44
+
45
+ Language.new(c_language).parse('a(aaa)')
46
+ Language.new(c_language).parse('b(bbb)')
47
+ Language.new(c_language).parse('c(ccc)')
@@ -0,0 +1,132 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ require 'parslet'
4
+ require 'parslet/convenience'
5
+
6
+ # This example demonstrates tree error reporting in a real life example.
7
+ # The parser code has been contributed by John Mettraux.
8
+
9
+ def prettify(str)
10
+ puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
11
+ str.lines.each_with_index do |line, index|
12
+ printf "%02d %s\n",
13
+ index+1,
14
+ line.chomp
15
+ end
16
+ end
17
+
18
+ class Parser < Parslet::Parser
19
+
20
+ # commons
21
+
22
+ rule(:space) { match('[ \t]').repeat(1) }
23
+ rule(:space?) { space.maybe }
24
+
25
+ rule(:newline) { match('[\r\n]') }
26
+
27
+ rule(:comment) { str('#') >> match('[^\r\n]').repeat }
28
+
29
+ rule(:line_separator) {
30
+ (space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
31
+ }
32
+
33
+ rule(:blank) { line_separator | space }
34
+ rule(:blank?) { blank.maybe }
35
+
36
+ rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
37
+
38
+ # res_statement
39
+
40
+ rule(:reference) {
41
+ (str('@').repeat(1,2) >> identifier).as(:reference)
42
+ }
43
+
44
+ rule(:res_action_or_link) {
45
+ str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
46
+ }
47
+
48
+ rule(:res_actions) {
49
+ (
50
+ reference
51
+ ).as(:resources) >>
52
+ (
53
+ res_action_or_link.as(:res_action)
54
+ ).repeat(0).as(:res_actions)
55
+ }
56
+
57
+ rule(:res_statement) {
58
+ res_actions >>
59
+ (str(':') >> identifier.as(:name)).maybe.as(:res_field)
60
+ }
61
+
62
+ # expression
63
+
64
+ rule(:expression) {
65
+ res_statement
66
+ }
67
+
68
+ # body
69
+
70
+ rule(:body) {
71
+ (line_separator >> (block | expression)).repeat(1).as(:body) >>
72
+ line_separator
73
+ }
74
+
75
+ # blocks
76
+
77
+ rule(:begin_block) {
78
+ (str('concurrent').as(:type) >> space).maybe.as(:pre) >>
79
+ str('begin').as(:begin) >>
80
+ body >>
81
+ str('end')
82
+ }
83
+
84
+ rule(:define_block) {
85
+ str('define').as(:define) >> space >>
86
+ identifier.as(:name) >> str('()') >>
87
+ body >>
88
+ str('end')
89
+ }
90
+
91
+ rule(:block) {
92
+ define_block | begin_block
93
+ }
94
+
95
+ # root
96
+
97
+ rule(:radix) {
98
+ line_separator.maybe >> block >> line_separator.maybe
99
+ }
100
+
101
+ root(:radix)
102
+ end
103
+
104
+
105
+ ds = [
106
+ %{
107
+ define f()
108
+ @res.name
109
+ end
110
+ },
111
+ %{
112
+ define f()
113
+ begin
114
+ @res.name
115
+ end
116
+ end
117
+ }
118
+ ]
119
+
120
+ ds.each do |d|
121
+
122
+ puts '-' * 80
123
+ prettify(d)
124
+
125
+ parser = Parser.new
126
+
127
+ begin
128
+ parser.parse_with_debug(d)
129
+ end
130
+ end
131
+
132
+ puts '-' * 80