ghazel-parslet 1.4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/HISTORY.txt +195 -0
  2. data/LICENSE +23 -0
  3. data/README +70 -0
  4. data/Rakefile +49 -0
  5. data/example/boolean_algebra.rb +70 -0
  6. data/example/calc.rb +153 -0
  7. data/example/comments.rb +35 -0
  8. data/example/deepest_errors.rb +131 -0
  9. data/example/documentation.rb +18 -0
  10. data/example/email_parser.rb +52 -0
  11. data/example/empty.rb +13 -0
  12. data/example/erb.rb +47 -0
  13. data/example/ignore.rb +33 -0
  14. data/example/ip_address.rb +125 -0
  15. data/example/json.rb +128 -0
  16. data/example/local.rb +34 -0
  17. data/example/mathn.rb +44 -0
  18. data/example/minilisp.rb +94 -0
  19. data/example/modularity.rb +47 -0
  20. data/example/nested_errors.rb +132 -0
  21. data/example/output/boolean_algebra.out +4 -0
  22. data/example/output/calc.out +1 -0
  23. data/example/output/comments.out +8 -0
  24. data/example/output/deepest_errors.out +54 -0
  25. data/example/output/documentation.err +4 -0
  26. data/example/output/documentation.out +1 -0
  27. data/example/output/email_parser.out +2 -0
  28. data/example/output/empty.err +1 -0
  29. data/example/output/erb.out +7 -0
  30. data/example/output/ignore.out +1 -0
  31. data/example/output/ignore_whitespace.out +1 -0
  32. data/example/output/ip_address.out +9 -0
  33. data/example/output/json.out +5 -0
  34. data/example/output/local.out +3 -0
  35. data/example/output/mathn.out +4 -0
  36. data/example/output/minilisp.out +5 -0
  37. data/example/output/modularity.out +0 -0
  38. data/example/output/nested_errors.out +54 -0
  39. data/example/output/parens.out +8 -0
  40. data/example/output/readme.out +1 -0
  41. data/example/output/seasons.out +28 -0
  42. data/example/output/sentence.out +1 -0
  43. data/example/output/simple_xml.out +2 -0
  44. data/example/output/string_parser.out +3 -0
  45. data/example/parens.rb +42 -0
  46. data/example/readme.rb +30 -0
  47. data/example/seasons.rb +46 -0
  48. data/example/sentence.rb +36 -0
  49. data/example/simple.lit +3 -0
  50. data/example/simple_xml.rb +54 -0
  51. data/example/string_parser.rb +77 -0
  52. data/example/test.lit +4 -0
  53. data/lib/parslet.rb +254 -0
  54. data/lib/parslet/atoms.rb +32 -0
  55. data/lib/parslet/atoms/alternative.rb +50 -0
  56. data/lib/parslet/atoms/base.rb +124 -0
  57. data/lib/parslet/atoms/can_flatten.rb +137 -0
  58. data/lib/parslet/atoms/context.rb +94 -0
  59. data/lib/parslet/atoms/dsl.rb +98 -0
  60. data/lib/parslet/atoms/entity.rb +41 -0
  61. data/lib/parslet/atoms/lookahead.rb +49 -0
  62. data/lib/parslet/atoms/named.rb +32 -0
  63. data/lib/parslet/atoms/re.rb +38 -0
  64. data/lib/parslet/atoms/repetition.rb +63 -0
  65. data/lib/parslet/atoms/rule.rb +12 -0
  66. data/lib/parslet/atoms/rule/position.rb +143 -0
  67. data/lib/parslet/atoms/sequence.rb +38 -0
  68. data/lib/parslet/atoms/str.rb +37 -0
  69. data/lib/parslet/atoms/visitor.rb +89 -0
  70. data/lib/parslet/cause.rb +94 -0
  71. data/lib/parslet/convenience.rb +35 -0
  72. data/lib/parslet/error_reporter.rb +7 -0
  73. data/lib/parslet/error_reporter/deepest.rb +95 -0
  74. data/lib/parslet/error_reporter/tree.rb +57 -0
  75. data/lib/parslet/export.rb +162 -0
  76. data/lib/parslet/expression.rb +51 -0
  77. data/lib/parslet/expression/treetop.rb +92 -0
  78. data/lib/parslet/parser.rb +67 -0
  79. data/lib/parslet/pattern.rb +114 -0
  80. data/lib/parslet/pattern/binding.rb +49 -0
  81. data/lib/parslet/rig/rspec.rb +51 -0
  82. data/lib/parslet/slice.rb +101 -0
  83. data/lib/parslet/source.rb +62 -0
  84. data/lib/parslet/source/line_cache.rb +95 -0
  85. data/lib/parslet/transform.rb +236 -0
  86. data/lib/parslet/transform/context.rb +32 -0
  87. metadata +264 -0
@@ -0,0 +1,35 @@
1
+ # A small example on how to parse common types of comments. The example
2
+ # started out with parser code from Stephen Waits.
3
+
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+ require 'parslet/convenience'
9
+
10
+ class ALanguage < Parslet::Parser
11
+ root(:lines)
12
+
13
+ rule(:lines) { line.repeat }
14
+ rule(:line) { spaces >> expression.repeat >> newline }
15
+ rule(:newline) { str("\n") >> str("\r").maybe }
16
+
17
+ rule(:expression) { (str('a').as(:a) >> spaces).as(:exp) }
18
+
19
+ rule(:spaces) { space.repeat }
20
+ rule(:space) { multiline_comment | line_comment | str(' ') }
21
+
22
+ rule(:line_comment) { (str('//') >> (newline.absent? >> any).repeat).as(:line) }
23
+ rule(:multiline_comment) { (str('/*') >> (str('*/').absent? >> any).repeat >> str('*/')).as(:multi) }
24
+ end
25
+
26
+ code = %q(
27
+ a
28
+ // line comment
29
+ a a a // line comment
30
+ a /* inline comment */ a
31
+ /* multiline
32
+ comment */
33
+ )
34
+
35
+ pp ALanguage.new.parse_with_debug(code)
@@ -0,0 +1,131 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ # This example demonstrates how to do deepest error reporting, as invented
4
+ # by John Mettraux (issue #64).
5
+
6
+ require 'parslet'
7
+ require 'parslet/convenience'
8
+
9
+ def prettify(str)
10
+ puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
11
+ str.lines.each_with_index do |line, index|
12
+ printf "%02d %s\n",
13
+ index+1,
14
+ line.chomp
15
+ end
16
+ end
17
+
18
+ class Parser < Parslet::Parser
19
+ # commons
20
+
21
+ rule(:space) { match('[ \t]').repeat(1) }
22
+ rule(:space?) { space.maybe }
23
+
24
+ rule(:newline) { match('[\r\n]') }
25
+
26
+ rule(:comment) { str('#') >> match('[^\r\n]').repeat }
27
+
28
+ rule(:line_separator) {
29
+ (space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
30
+ }
31
+
32
+ rule(:blank) { line_separator | space }
33
+ rule(:blank?) { blank.maybe }
34
+
35
+ rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
36
+
37
+ # res_statement
38
+
39
+ rule(:reference) {
40
+ (str('@').repeat(1,2) >> identifier).as(:reference)
41
+ }
42
+
43
+ rule(:res_action_or_link) {
44
+ str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
45
+ }
46
+
47
+ rule(:res_actions) {
48
+ (
49
+ reference
50
+ ).as(:resources) >>
51
+ (
52
+ res_action_or_link.as(:res_action)
53
+ ).repeat(0).as(:res_actions)
54
+ }
55
+
56
+ rule(:res_statement) {
57
+ res_actions >>
58
+ (str(':') >> identifier.as(:name)).maybe.as(:res_field)
59
+ }
60
+
61
+ # expression
62
+
63
+ rule(:expression) {
64
+ res_statement
65
+ }
66
+
67
+ # body
68
+
69
+ rule(:body) {
70
+ (line_separator >> (block | expression)).repeat(1).as(:body) >>
71
+ line_separator
72
+ }
73
+
74
+ # blocks
75
+
76
+ rule(:begin_block) {
77
+ (str('concurrent').as(:type) >> space).maybe.as(:pre) >>
78
+ str('begin').as(:begin) >>
79
+ body >>
80
+ str('end')
81
+ }
82
+
83
+ rule(:define_block) {
84
+ str('define').as(:define) >> space >>
85
+ identifier.as(:name) >> str('()') >>
86
+ body >>
87
+ str('end')
88
+ }
89
+
90
+ rule(:block) {
91
+ define_block | begin_block
92
+ }
93
+
94
+ # root
95
+
96
+ rule(:radix) {
97
+ line_separator.maybe >> block >> line_separator.maybe
98
+ }
99
+
100
+ root(:radix)
101
+ end
102
+
103
+ ds = [
104
+ %{
105
+ define f()
106
+ @res.name
107
+ end
108
+ },
109
+ %{
110
+ define f()
111
+ begin
112
+ @res.name
113
+ end
114
+ end
115
+ }
116
+ ]
117
+
118
+ ds.each do |d|
119
+
120
+ puts '-' * 80
121
+ prettify(d)
122
+
123
+ parser = Parser.new
124
+
125
+ begin
126
+ parser.parse_with_debug(d,
127
+ :reporter => Parslet::ErrorReporter::Deepest.new)
128
+ end
129
+ end
130
+
131
+ puts '-' * 80
@@ -0,0 +1,18 @@
1
+ # A small example that shows a really small parser and what happens on parser
2
+ # errors.
3
+
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+
9
+ class MyParser < Parslet::Parser
10
+ rule(:a) { str('a').repeat }
11
+
12
+ def parse(str)
13
+ a.parse(str)
14
+ end
15
+ end
16
+
17
+ pp MyParser.new.parse('aaaa')
18
+ pp MyParser.new.parse('bbbb')
@@ -0,0 +1,52 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Example contributed by Hal Brodigan (postmodern). Thanks!
4
+
5
+ $:.unshift File.dirname(__FILE__) + "/../lib"
6
+ require 'parslet'
7
+ require 'parslet/convenience'
8
+
9
+ class EmailParser < Parslet::Parser
10
+ rule(:space) { match('\s').repeat(1) }
11
+ rule(:space?) { space.maybe }
12
+ rule(:dash?) { match['_-'].maybe }
13
+
14
+ rule(:at) {
15
+ str('@') |
16
+ (dash? >> (str('at') | str('AT')) >> dash?)
17
+ }
18
+ rule(:dot) {
19
+ str('.') |
20
+ (dash? >> (str('dot') | str('DOT')) >> dash?)
21
+ }
22
+
23
+ rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
24
+ rule(:separator) { dot.as(:dot) >> space? | space }
25
+ rule(:words) { word >> (separator >> word).repeat }
26
+
27
+ rule(:email) {
28
+ (words.as(:username) >> space? >> at >> space? >> words).as(:email)
29
+ }
30
+
31
+ root(:email)
32
+ end
33
+
34
+ class EmailSanitizer < Parslet::Transform
35
+ rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
36
+ rule(:word => simple(:word)) { word }
37
+
38
+ rule(:username => sequence(:username)) { username.join + "@" }
39
+ rule(:username => simple(:username)) { username.to_s + "@" }
40
+
41
+ rule(:email => sequence(:email)) { email.join }
42
+ end
43
+
44
+ parser = EmailParser.new
45
+ sanitizer = EmailSanitizer.new
46
+
47
+ unless ARGV[0]
48
+ STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
49
+ STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using a.b.c.d@gmail.com"
50
+ end
51
+
52
+ p sanitizer.apply(parser.parse_with_debug(ARGV[0] || 'a.b.c.d@gmail.com'))
@@ -0,0 +1,13 @@
1
+ # Basically just demonstrates that you can leave rules empty and get a nice
2
+ # NotImplementedError. A way to quickly spec out your parser rules?
3
+
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
+
6
+ require 'parslet'
7
+
8
+ class Parser < Parslet::Parser
9
+ rule(:empty) { }
10
+ end
11
+
12
+
13
+ Parser.new.empty.parslet
@@ -0,0 +1,47 @@
1
+ # Example that demonstrates how a simple erb-like parser could be constructed.
2
+
3
+ $:.unshift File.dirname(__FILE__) + "/../lib"
4
+
5
+ require 'parslet'
6
+
7
+ class ErbParser < Parslet::Parser
8
+ rule(:ruby) { (str('%>').absent? >> any).repeat.as(:ruby) }
9
+
10
+ rule(:expression) { (str('=') >> ruby).as(:expression) }
11
+ rule(:comment) { (str('#') >> ruby).as(:comment) }
12
+ rule(:code) { ruby.as(:code) }
13
+ rule(:erb) { expression | comment | code }
14
+
15
+ rule(:erb_with_tags) { str('<%') >> erb >> str('%>') }
16
+ rule(:text) { (str('<%').absent? >> any).repeat(1) }
17
+
18
+ rule(:text_with_ruby) { (text.as(:text) | erb_with_tags).repeat.as(:text) }
19
+ root(:text_with_ruby)
20
+ end
21
+
22
+ parser = ErbParser.new
23
+ p parser.parse "The value of x is <%= x %>."
24
+ p parser.parse "<% 1 + 2 %>"
25
+ p parser.parse "<%# commented %>"
26
+
27
+
28
+ evaluator = Parslet::Transform.new do
29
+
30
+ erb_binding = binding
31
+
32
+ rule(:code => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding); '' }
33
+ rule(:expression => { :ruby => simple(:ruby) }) { eval(ruby, erb_binding) }
34
+ rule(:comment => { :ruby => simple(:ruby) }) { '' }
35
+
36
+ rule(:text => simple(:text)) { text }
37
+ rule(:text => sequence(:texts)) { texts.join }
38
+
39
+ end
40
+
41
+ puts evaluator.apply(parser.parse(<<-ERB
42
+ The <% a = 2 %>not printed result of "a = 2".
43
+ The <%# a = 1 %>not printed non-evaluated comment "a = 1", see the value of a below.
44
+ The <%= 'nicely' %> printed result.
45
+ The <% b = 3 %>value of a is <%= a %>, and b is <%= b %>.
46
+ ERB
47
+ ))
@@ -0,0 +1,33 @@
1
+ # A small example on how to make parslet ignore parts of the parse tree.
2
+
3
+ $:.unshift File.dirname(__FILE__) + "/../lib"
4
+ require 'parslet'
5
+
6
+ class IgnoreParslet < Parslet::Atoms::Base
7
+ def initialize(parslet)
8
+ @parslet = parslet
9
+ end
10
+ def to_s_inner(prec)
11
+ @parslet.to_s(prec)
12
+ end
13
+ def try(source, context)
14
+ success, value = result = @parslet.try(source, context)
15
+
16
+ return succ(nil) if success
17
+ return result
18
+ end
19
+
20
+ end
21
+ module IgnoreDSL
22
+ def ignore
23
+ IgnoreParslet.new(self)
24
+ end
25
+ end
26
+
27
+ class Parslet::Atoms::Base
28
+ include IgnoreDSL
29
+ end
30
+
31
+ include Parslet
32
+ p (str('a') >> str('b').ignore >> str('c')).
33
+ parse('abc')
@@ -0,0 +1,125 @@
1
+ # This example is heavily inspired by citrus' ip.citrus. Have a look at both
2
+ # of these to get some choice!
3
+
4
+ # The grammars in this file conform to the ABNF given in Appendix A of RFC 3986
5
+ # Uniform Resource Identifier (URI): Generic Syntax.
6
+ #
7
+ # See http://tools.ietf.org/html/rfc3986#appendix-A for more information.
8
+
9
+ $:.unshift File.dirname(__FILE__) + "/../lib"
10
+
11
+ require 'pp'
12
+ require 'parslet'
13
+
14
+ module IPv4
15
+ include Parslet
16
+
17
+ # A host identified by an IPv4 literal address is represented in
18
+ # dotted-decimal notation (a sequence of four decimal numbers in the range 0
19
+ # to 255, separated by "."), as described in [RFC1123] by reference to
20
+ # [RFC0952]. Note that other forms of dotted notation may be interpreted on
21
+ # some platforms, as described in Section 7.4, but only the dotted-decimal
22
+ # form of four octets is allowed by this grammar.
23
+ rule(:ipv4) {
24
+ (dec_octet >> str('.') >> dec_octet >> str('.') >>
25
+ dec_octet >> str('.') >> dec_octet).as(:ipv4)
26
+ }
27
+
28
+ rule(:dec_octet) {
29
+ str('25') >> match("[0-5]") |
30
+ str('2') >> match("[0-4]") >> digit |
31
+ str('1') >> digit >> digit |
32
+ match('[1-9]') >> digit |
33
+ digit
34
+ }
35
+
36
+ rule(:digit) {
37
+ match('[0-9]')
38
+ }
39
+ end
40
+
41
+ # Must be used in concert with IPv4
42
+ module IPv6
43
+ include Parslet
44
+
45
+ rule(:colon) { str(':') }
46
+ rule(:dcolon) { colon >> colon }
47
+
48
+ # h16 :
49
+ def h16r(times)
50
+ (h16 >> colon).repeat(times, times)
51
+ end
52
+
53
+ # : h16
54
+ def h16l(times)
55
+ (colon >> h16).repeat(0,times)
56
+ end
57
+
58
+ # A 128-bit IPv6 address is divided into eight 16-bit pieces. Each piece is
59
+ # represented numerically in case-insensitive hexadecimal, using one to four
60
+ # hexadecimal digits (leading zeroes are permitted). The eight encoded
61
+ # pieces are given most-significant first, separated by colon characters.
62
+ # Optionally, the least-significant two pieces may instead be represented in
63
+ # IPv4 address textual format. A sequence of one or more consecutive
64
+ # zero-valued 16-bit pieces within the address may be elided, omitting all
65
+ # their digits and leaving exactly two consecutive colons in their place to
66
+ # mark the elision.
67
+ rule(:ipv6) {
68
+ (
69
+ (
70
+ h16r(6) |
71
+ dcolon >> h16r(5) |
72
+ h16.maybe >> dcolon >> h16r(4) |
73
+ (h16 >> h16l(1)).maybe >> dcolon >> h16r(3) |
74
+ (h16 >> h16l(2)).maybe >> dcolon >> h16r(2) |
75
+ (h16 >> h16l(3)).maybe >> dcolon >> h16r(1) |
76
+ (h16 >> h16l(4)).maybe >> dcolon
77
+ ) >> ls32 |
78
+ (h16 >> h16l(5)).maybe >> dcolon >> h16 |
79
+ (h16 >> h16l(6)).maybe >> dcolon
80
+ ).as(:ipv6)
81
+ }
82
+
83
+ rule(:h16) {
84
+ hexdigit.repeat(1,4)
85
+ }
86
+
87
+ rule(:ls32) {
88
+ (h16 >> colon >> h16) |
89
+ ipv4
90
+ }
91
+
92
+ rule(:hexdigit) {
93
+ digit | match("[a-fA-F]")
94
+ }
95
+ end
96
+
97
+ class Parser
98
+ include IPv4
99
+ include IPv6
100
+
101
+ def parse(str)
102
+ (ipv4 | ipv6).parse(str)
103
+ end
104
+ end
105
+
106
+ %W(
107
+ 0.0.0.0
108
+ 255.255.255.255
109
+ 255.255.255
110
+ 1:2:3:4:5:6:7:8
111
+ 12AD:34FC:A453:1922::
112
+ 12AD::34FC
113
+ 12AD::
114
+ ::
115
+ 1:2
116
+ ).each do |address|
117
+ parser = Parser.new
118
+ printf "%30s -> ", address
119
+ begin
120
+ result = parser.parse(address)
121
+ puts result.inspect
122
+ rescue Parslet::ParseFailed => m
123
+ puts "Failed: #{m}"
124
+ end
125
+ end