ghazel-parslet 1.4.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/HISTORY.txt +195 -0
  2. data/LICENSE +23 -0
  3. data/README +70 -0
  4. data/Rakefile +49 -0
  5. data/example/boolean_algebra.rb +70 -0
  6. data/example/calc.rb +153 -0
  7. data/example/comments.rb +35 -0
  8. data/example/deepest_errors.rb +131 -0
  9. data/example/documentation.rb +18 -0
  10. data/example/email_parser.rb +52 -0
  11. data/example/empty.rb +13 -0
  12. data/example/erb.rb +47 -0
  13. data/example/ignore.rb +33 -0
  14. data/example/ip_address.rb +125 -0
  15. data/example/json.rb +128 -0
  16. data/example/local.rb +34 -0
  17. data/example/mathn.rb +44 -0
  18. data/example/minilisp.rb +94 -0
  19. data/example/modularity.rb +47 -0
  20. data/example/nested_errors.rb +132 -0
  21. data/example/output/boolean_algebra.out +4 -0
  22. data/example/output/calc.out +1 -0
  23. data/example/output/comments.out +8 -0
  24. data/example/output/deepest_errors.out +54 -0
  25. data/example/output/documentation.err +4 -0
  26. data/example/output/documentation.out +1 -0
  27. data/example/output/email_parser.out +2 -0
  28. data/example/output/empty.err +1 -0
  29. data/example/output/erb.out +7 -0
  30. data/example/output/ignore.out +1 -0
  31. data/example/output/ignore_whitespace.out +1 -0
  32. data/example/output/ip_address.out +9 -0
  33. data/example/output/json.out +5 -0
  34. data/example/output/local.out +3 -0
  35. data/example/output/mathn.out +4 -0
  36. data/example/output/minilisp.out +5 -0
  37. data/example/output/modularity.out +0 -0
  38. data/example/output/nested_errors.out +54 -0
  39. data/example/output/parens.out +8 -0
  40. data/example/output/readme.out +1 -0
  41. data/example/output/seasons.out +28 -0
  42. data/example/output/sentence.out +1 -0
  43. data/example/output/simple_xml.out +2 -0
  44. data/example/output/string_parser.out +3 -0
  45. data/example/parens.rb +42 -0
  46. data/example/readme.rb +30 -0
  47. data/example/seasons.rb +46 -0
  48. data/example/sentence.rb +36 -0
  49. data/example/simple.lit +3 -0
  50. data/example/simple_xml.rb +54 -0
  51. data/example/string_parser.rb +77 -0
  52. data/example/test.lit +4 -0
  53. data/lib/parslet.rb +254 -0
  54. data/lib/parslet/atoms.rb +32 -0
  55. data/lib/parslet/atoms/alternative.rb +50 -0
  56. data/lib/parslet/atoms/base.rb +124 -0
  57. data/lib/parslet/atoms/can_flatten.rb +137 -0
  58. data/lib/parslet/atoms/context.rb +94 -0
  59. data/lib/parslet/atoms/dsl.rb +98 -0
  60. data/lib/parslet/atoms/entity.rb +41 -0
  61. data/lib/parslet/atoms/lookahead.rb +49 -0
  62. data/lib/parslet/atoms/named.rb +32 -0
  63. data/lib/parslet/atoms/re.rb +38 -0
  64. data/lib/parslet/atoms/repetition.rb +63 -0
  65. data/lib/parslet/atoms/rule.rb +12 -0
  66. data/lib/parslet/atoms/rule/position.rb +143 -0
  67. data/lib/parslet/atoms/sequence.rb +38 -0
  68. data/lib/parslet/atoms/str.rb +37 -0
  69. data/lib/parslet/atoms/visitor.rb +89 -0
  70. data/lib/parslet/cause.rb +94 -0
  71. data/lib/parslet/convenience.rb +35 -0
  72. data/lib/parslet/error_reporter.rb +7 -0
  73. data/lib/parslet/error_reporter/deepest.rb +95 -0
  74. data/lib/parslet/error_reporter/tree.rb +57 -0
  75. data/lib/parslet/export.rb +162 -0
  76. data/lib/parslet/expression.rb +51 -0
  77. data/lib/parslet/expression/treetop.rb +92 -0
  78. data/lib/parslet/parser.rb +67 -0
  79. data/lib/parslet/pattern.rb +114 -0
  80. data/lib/parslet/pattern/binding.rb +49 -0
  81. data/lib/parslet/rig/rspec.rb +51 -0
  82. data/lib/parslet/slice.rb +101 -0
  83. data/lib/parslet/source.rb +62 -0
  84. data/lib/parslet/source/line_cache.rb +95 -0
  85. data/lib/parslet/transform.rb +236 -0
  86. data/lib/parslet/transform/context.rb +32 -0
  87. metadata +264 -0
@@ -0,0 +1,35 @@
1
+ # A small example on how to parse common types of comments. The example
2
+ # started out with parser code from Stephen Waits.
3
+
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+ require 'parslet/convenience'
9
+
10
+ class ALanguage < Parslet::Parser
11
+ root(:lines)
12
+
13
+ rule(:lines) { line.repeat }
14
+ rule(:line) { spaces >> expression.repeat >> newline }
15
+ rule(:newline) { str("\n") >> str("\r").maybe }
16
+
17
+ rule(:expression) { (str('a').as(:a) >> spaces).as(:exp) }
18
+
19
+ rule(:spaces) { space.repeat }
20
+ rule(:space) { multiline_comment | line_comment | str(' ') }
21
+
22
+ rule(:line_comment) { (str('//') >> (newline.absent? >> any).repeat).as(:line) }
23
+ rule(:multiline_comment) { (str('/*') >> (str('*/').absent? >> any).repeat >> str('*/')).as(:multi) }
24
+ end
25
+
26
+ code = %q(
27
+ a
28
+ // line comment
29
+ a a a // line comment
30
+ a /* inline comment */ a
31
+ /* multiline
32
+ comment */
33
+ )
34
+
35
+ pp ALanguage.new.parse_with_debug(code)
@@ -0,0 +1,131 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ # This example demonstrates how to do deepest error reporting, as invented
4
+ # by John Mettraux (issue #64).
5
+
6
+ require 'parslet'
7
+ require 'parslet/convenience'
8
+
9
+ def prettify(str)
10
+ puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
11
+ str.lines.each_with_index do |line, index|
12
+ printf "%02d %s\n",
13
+ index+1,
14
+ line.chomp
15
+ end
16
+ end
17
+
18
+ class Parser < Parslet::Parser
19
+ # commons
20
+
21
+ rule(:space) { match('[ \t]').repeat(1) }
22
+ rule(:space?) { space.maybe }
23
+
24
+ rule(:newline) { match('[\r\n]') }
25
+
26
+ rule(:comment) { str('#') >> match('[^\r\n]').repeat }
27
+
28
+ rule(:line_separator) {
29
+ (space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
30
+ }
31
+
32
+ rule(:blank) { line_separator | space }
33
+ rule(:blank?) { blank.maybe }
34
+
35
+ rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
36
+
37
+ # res_statement
38
+
39
+ rule(:reference) {
40
+ (str('@').repeat(1,2) >> identifier).as(:reference)
41
+ }
42
+
43
+ rule(:res_action_or_link) {
44
+ str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
45
+ }
46
+
47
+ rule(:res_actions) {
48
+ (
49
+ reference
50
+ ).as(:resources) >>
51
+ (
52
+ res_action_or_link.as(:res_action)
53
+ ).repeat(0).as(:res_actions)
54
+ }
55
+
56
+ rule(:res_statement) {
57
+ res_actions >>
58
+ (str(':') >> identifier.as(:name)).maybe.as(:res_field)
59
+ }
60
+
61
+ # expression
62
+
63
+ rule(:expression) {
64
+ res_statement
65
+ }
66
+
67
+ # body
68
+
69
+ rule(:body) {
70
+ (line_separator >> (block | expression)).repeat(1).as(:body) >>
71
+ line_separator
72
+ }
73
+
74
+ # blocks
75
+
76
+ rule(:begin_block) {
77
+ (str('concurrent').as(:type) >> space).maybe.as(:pre) >>
78
+ str('begin').as(:begin) >>
79
+ body >>
80
+ str('end')
81
+ }
82
+
83
+ rule(:define_block) {
84
+ str('define').as(:define) >> space >>
85
+ identifier.as(:name) >> str('()') >>
86
+ body >>
87
+ str('end')
88
+ }
89
+
90
+ rule(:block) {
91
+ define_block | begin_block
92
+ }
93
+
94
+ # root
95
+
96
+ rule(:radix) {
97
+ line_separator.maybe >> block >> line_separator.maybe
98
+ }
99
+
100
+ root(:radix)
101
+ end
102
+
103
+ ds = [
104
+ %{
105
+ define f()
106
+ @res.name
107
+ end
108
+ },
109
+ %{
110
+ define f()
111
+ begin
112
+ @res.name
113
+ end
114
+ end
115
+ }
116
+ ]
117
+
118
+ ds.each do |d|
119
+
120
+ puts '-' * 80
121
+ prettify(d)
122
+
123
+ parser = Parser.new
124
+
125
+ begin
126
+ parser.parse_with_debug(d,
127
+ :reporter => Parslet::ErrorReporter::Deepest.new)
128
+ end
129
+ end
130
+
131
+ puts '-' * 80
@@ -0,0 +1,18 @@
1
+ # A small example that shows a really small parser and what happens on parser
2
+ # errors.
3
+
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+
9
+ class MyParser < Parslet::Parser
10
+ rule(:a) { str('a').repeat }
11
+
12
+ def parse(str)
13
+ a.parse(str)
14
+ end
15
+ end
16
+
17
+ pp MyParser.new.parse('aaaa')
18
+ pp MyParser.new.parse('bbbb')
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Example contributed by Hal Brodigan (postmodern). Thanks!
4
+
5
+ $:.unshift File.dirname(__FILE__) + "/../lib"
6
+ require 'parslet'
7
+ require 'parslet/convenience'
8
+
9
+ class EmailParser < Parslet::Parser
10
+ rule(:space) { match('\s').repeat(1) }
11
+ rule(:space?) { space.maybe }
12
+ rule(:dash?) { match['_-'].maybe }
13
+
14
+ rule(:at) {
15
+ str('@') |
16
+ (dash? >> (str('at') | str('AT')) >> dash?)
17
+ }
18
+ rule(:dot) {
19
+ str('.') |
20
+ (dash? >> (str('dot') | str('DOT')) >> dash?)
21
+ }
22
+
23
+ rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
24
+ rule(:separator) { dot.as(:dot) >> space? | space }
25
+ rule(:words) { word >> (separator >> word).repeat }
26
+
27
+ rule(:email) {
28
+ (words.as(:username) >> space? >> at >> space? >> words).as(:email)
29
+ }
30
+
31
+ root(:email)
32
+ end
33
+
34
+ class EmailSanitizer < Parslet::Transform
35
+ rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
36
+ rule(:word => simple(:word)) { word }
37
+
38
+ rule(:username => sequence(:username)) { username.join + "@" }
39
+ rule(:username => simple(:username)) { username.to_s + "@" }
40
+
41
+ rule(:email => sequence(:email)) { email.join }
42
+ end
43
+
44
+ parser = EmailParser.new
45
+ sanitizer = EmailSanitizer.new
46
+
47
+ unless ARGV[0]
48
+ STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
49
+ STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com"
50
+ end
51
+
52
+ p sanitizer.apply(parser.parse_with_debug(ARGV[0] || 'a.b.c.d@gmail.com'))
@@ -0,0 +1,13 @@
1
+ # Basically just demonstrates that you can leave rules empty and get a nice
2
+ # NotImplementedError. A way to quickly spec out your parser rules?
3
+
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
+
6
+ require 'parslet'
7
+
8
+ class Parser < Parslet::Parser
9
+ rule(:empty) { }
10
+ end
11
+
12
+
13
+ Parser.new.empty.parslet
@@ -0,0 +1,47 @@
1
+ # Example that demonstrates how a simple erb-like parser could be constructed.
2
+
3
+ $:.unshift File.dirname(__FILE__) + "/../lib"
4
+
5
+ require 'parslet'
6
+
7
+ class ErbParser < Parslet::Parser
8
+ rule(:ruby) { (str('%>').absent? >> any).repeat.as(:ruby) }
9
+
10
+ rule(:expression) { (str('=') >> ruby).as(:expression) }
11
+ rule(:comment) { (str('#') >> ruby).as(:comment) }
12
+ rule(:code) { ruby.as(:code) }
13
+ rule(:erb) { expression | comment | code }
14
+
15
+ rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
16
+ rule(:text) { (str('<%').absent? >> any).repeat(1) }
17
+
18
+ rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
19
+ root(:text_with_ruby)
20
+ end
21
+
22
+ parser = ErbParser.new
23
+ p parser.parse "The value of x is <%= x %>."
24
+ p parser.parse "<% 1 + 2 %>"
25
+ p parser.parse "<%# commented %>"
26
+
27
+
28
+ evaluator = Parslet::Transform.new do
29
+
30
+ erb_binding = binding
31
+
32
+ rule(:code => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding); '' }
33
+ rule(:expression => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding) }
34
+ rule(:comment => { :ruby => simple(:ruby) }) { '' }
35
+
36
+ rule(:text => simple(:text)) { text }
37
+ rule(:text => sequence(:texts)) { texts.join }
38
+
39
+ end
40
+
41
+ puts evaluator.apply(parser.parse(<<-ERB
42
+ The <% a = 2 %>not printed result of "a = 2".
43
+ The <%# a = 1 %>not printed non-evaluated comment "a = 1", see the value of a below.
44
+ The <%= 'nicely' %> printed result.
45
+ The <% b = 3 %>value of a is <%= a %>, and b is <%= b %>.
46
+ ERB
47
+ ))
@@ -0,0 +1,33 @@
1
+ # A small example on how to make parslet ignore parts of the parse tree.
2
+
3
+ $:.unshift File.dirname(__FILE__) + "/../lib"
4
+ require 'parslet'
5
+
6
+ class IgnoreParslet < Parslet::Atoms::Base
7
+ def initialize(parslet)
8
+ @parslet = parslet
9
+ end
10
+ def to_s_inner(prec)
11
+ @parslet.to_s(prec)
12
+ end
13
+ def try(source, context)
14
+ success, value = result = @parslet.try(source, context)
15
+
16
+ return succ(nil) if success
17
+ return result
18
+ end
19
+
20
+ end
21
+ module IgnoreDSL
22
+ def ignore
23
+ IgnoreParslet.new(self)
24
+ end
25
+ end
26
+
27
+ class Parslet::Atoms::Base
28
+ include IgnoreDSL
29
+ end
30
+
31
+ include Parslet
32
+ p (str('a') >> str('b').ignore >> str('c')).
33
+ parse('abc')
@@ -0,0 +1,125 @@
1
+ # This example is heavily inspired by citrus' ip.citrus. Have a look at both
2
+ # of these to get some choice!
3
+
4
+ # The grammars in this file conform to the ABNF given in Appendix A of RFC 3986
5
+ # Uniform Resource Identifier (URI): Generic Syntax.
6
+ #
7
+ # See http://tools.ietf.org/html/rfc3986#appendix-A for more information.
8
+
9
+ $:.unshift File.dirname(__FILE__) + "/../lib"
10
+
11
+ require 'pp'
12
+ require 'parslet'
13
+
14
+ module IPv4
15
+ include Parslet
16
+
17
+ # A host identified by an IPv4 literal address is represented in
18
+ # dotted-decimal notation (a sequence of four decimal numbers in the range 0
19
+ # to 255, separated by "."), as described in [RFC1123] by reference to
20
+ # [RFC0952]. Note that other forms of dotted notation may be interpreted on
21
+ # some platforms, as described in Section 7.4, but only the dotted-decimal
22
+ # form of four octets is allowed by this grammar.
23
+ rule(:ipv4) {
24
+ (dec_octet >> str('.') >> dec_octet >> str('.') >>
25
+ dec_octet >> str('.') >> dec_octet).as(:ipv4)
26
+ }
27
+
28
+ rule(:dec_octet) {
29
+ str('25') >> match("[0-5]") |
30
+ str('2') >> match("[0-4]") >> digit |
31
+ str('1') >> digit >> digit |
32
+ match('[1-9]') >> digit |
33
+ digit
34
+ }
35
+
36
+ rule(:digit) {
37
+ match('[0-9]')
38
+ }
39
+ end
40
+
41
+ # Must be used in concert with IPv4
42
+ module IPv6
43
+ include Parslet
44
+
45
+ rule(:colon) { str(':') }
46
+ rule(:dcolon) { colon >> colon }
47
+
48
+ # h16 :
49
+ def h16r(times)
50
+ (h16 >> colon).repeat(times, times)
51
+ end
52
+
53
+ # : h16
54
+ def h16l(times)
55
+ (colon >> h16).repeat(0,times)
56
+ end
57
+
58
+ # A 128-bit IPv6 address is divided into eight 16-bit pieces. Each piece is
59
+ # represented numerically in case-insensitive hexadecimal, using one to four
60
+ # hexadecimal digits (leading zeroes are permitted). The eight encoded
61
+ # pieces are given most-significant first, separated by colon characters.
62
+ # Optionally, the least-significant two pieces may instead be represented in
63
+ # IPv4 address textual format. A sequence of one or more consecutive
64
+ # zero-valued 16-bit pieces within the address may be elided, omitting all
65
+ # their digits and leaving exactly two consecutive colons in their place to
66
+ # mark the elision.
67
+ rule(:ipv6) {
68
+ (
69
+ (
70
+ h16r(6) |
71
+ dcolon >> h16r(5) |
72
+ h16.maybe >> dcolon >> h16r(4) |
73
+ (h16 >> h16l(1)).maybe >> dcolon >> h16r(3) |
74
+ (h16 >> h16l(2)).maybe >> dcolon >> h16r(2) |
75
+ (h16 >> h16l(3)).maybe >> dcolon >> h16r(1) |
76
+ (h16 >> h16l(4)).maybe >> dcolon
77
+ ) >> ls32 |
78
+ (h16 >> h16l(5)).maybe >> dcolon >> h16 |
79
+ (h16 >> h16l(6)).maybe >> dcolon
80
+ ).as(:ipv6)
81
+ }
82
+
83
+ rule(:h16) {
84
+ hexdigit.repeat(1,4)
85
+ }
86
+
87
+ rule(:ls32) {
88
+ (h16 >> colon >> h16) |
89
+ ipv4
90
+ }
91
+
92
+ rule(:hexdigit) {
93
+ digit | match("[a-fA-F]")
94
+ }
95
+ end
96
+
97
+ class Parser
98
+ include IPv4
99
+ include IPv6
100
+
101
+ def parse(str)
102
+ (ipv4 | ipv6).parse(str)
103
+ end
104
+ end
105
+
106
+ %W(
107
+ 0.0.0.0
108
+ 255.255.255.255
109
+ 255.255.255
110
+ 1:2:3:4:5:6:7:8
111
+ 12AD:34FC:A453:1922::
112
+ 12AD::34FC
113
+ 12AD::
114
+ ::
115
+ 1:2
116
+ ).each do |address|
117
+ parser = Parser.new
118
+ printf "%30s -> ", address
119
+ begin
120
+ result = parser.parse(address)
121
+ puts result.inspect
122
+ rescue Parslet::ParseFailed => m
123
+ puts "Failed: #{m}"
124
+ end
125
+ end