ghazel-parslet 1.4.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/HISTORY.txt +195 -0
  2. data/LICENSE +23 -0
  3. data/README +70 -0
  4. data/Rakefile +49 -0
  5. data/example/boolean_algebra.rb +70 -0
  6. data/example/calc.rb +153 -0
  7. data/example/comments.rb +35 -0
  8. data/example/deepest_errors.rb +131 -0
  9. data/example/documentation.rb +18 -0
  10. data/example/email_parser.rb +52 -0
  11. data/example/empty.rb +13 -0
  12. data/example/erb.rb +47 -0
  13. data/example/ignore.rb +33 -0
  14. data/example/ip_address.rb +125 -0
  15. data/example/json.rb +128 -0
  16. data/example/local.rb +34 -0
  17. data/example/mathn.rb +44 -0
  18. data/example/minilisp.rb +94 -0
  19. data/example/modularity.rb +47 -0
  20. data/example/nested_errors.rb +132 -0
  21. data/example/output/boolean_algebra.out +4 -0
  22. data/example/output/calc.out +1 -0
  23. data/example/output/comments.out +8 -0
  24. data/example/output/deepest_errors.out +54 -0
  25. data/example/output/documentation.err +4 -0
  26. data/example/output/documentation.out +1 -0
  27. data/example/output/email_parser.out +2 -0
  28. data/example/output/empty.err +1 -0
  29. data/example/output/erb.out +7 -0
  30. data/example/output/ignore.out +1 -0
  31. data/example/output/ignore_whitespace.out +1 -0
  32. data/example/output/ip_address.out +9 -0
  33. data/example/output/json.out +5 -0
  34. data/example/output/local.out +3 -0
  35. data/example/output/mathn.out +4 -0
  36. data/example/output/minilisp.out +5 -0
  37. data/example/output/modularity.out +0 -0
  38. data/example/output/nested_errors.out +54 -0
  39. data/example/output/parens.out +8 -0
  40. data/example/output/readme.out +1 -0
  41. data/example/output/seasons.out +28 -0
  42. data/example/output/sentence.out +1 -0
  43. data/example/output/simple_xml.out +2 -0
  44. data/example/output/string_parser.out +3 -0
  45. data/example/parens.rb +42 -0
  46. data/example/readme.rb +30 -0
  47. data/example/seasons.rb +46 -0
  48. data/example/sentence.rb +36 -0
  49. data/example/simple.lit +3 -0
  50. data/example/simple_xml.rb +54 -0
  51. data/example/string_parser.rb +77 -0
  52. data/example/test.lit +4 -0
  53. data/lib/parslet.rb +254 -0
  54. data/lib/parslet/atoms.rb +32 -0
  55. data/lib/parslet/atoms/alternative.rb +50 -0
  56. data/lib/parslet/atoms/base.rb +124 -0
  57. data/lib/parslet/atoms/can_flatten.rb +137 -0
  58. data/lib/parslet/atoms/context.rb +94 -0
  59. data/lib/parslet/atoms/dsl.rb +98 -0
  60. data/lib/parslet/atoms/entity.rb +41 -0
  61. data/lib/parslet/atoms/lookahead.rb +49 -0
  62. data/lib/parslet/atoms/named.rb +32 -0
  63. data/lib/parslet/atoms/re.rb +38 -0
  64. data/lib/parslet/atoms/repetition.rb +63 -0
  65. data/lib/parslet/atoms/rule.rb +12 -0
  66. data/lib/parslet/atoms/rule/position.rb +143 -0
  67. data/lib/parslet/atoms/sequence.rb +38 -0
  68. data/lib/parslet/atoms/str.rb +37 -0
  69. data/lib/parslet/atoms/visitor.rb +89 -0
  70. data/lib/parslet/cause.rb +94 -0
  71. data/lib/parslet/convenience.rb +35 -0
  72. data/lib/parslet/error_reporter.rb +7 -0
  73. data/lib/parslet/error_reporter/deepest.rb +95 -0
  74. data/lib/parslet/error_reporter/tree.rb +57 -0
  75. data/lib/parslet/export.rb +162 -0
  76. data/lib/parslet/expression.rb +51 -0
  77. data/lib/parslet/expression/treetop.rb +92 -0
  78. data/lib/parslet/parser.rb +67 -0
  79. data/lib/parslet/pattern.rb +114 -0
  80. data/lib/parslet/pattern/binding.rb +49 -0
  81. data/lib/parslet/rig/rspec.rb +51 -0
  82. data/lib/parslet/slice.rb +101 -0
  83. data/lib/parslet/source.rb +62 -0
  84. data/lib/parslet/source/line_cache.rb +95 -0
  85. data/lib/parslet/transform.rb +236 -0
  86. data/lib/parslet/transform/context.rb +32 -0
  87. metadata +264 -0
@@ -0,0 +1,128 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ #
4
+ # MIT License - (c) 2011 John Mettraux
5
+ #
6
+
7
+ require 'rubygems'
8
+ require 'parslet' # gem install parslet
9
+
10
+
11
+ module MyJson
12
+
13
+ class Parser < Parslet::Parser
14
+
15
+ rule(:spaces) { match('\s').repeat(1) }
16
+ rule(:spaces?) { spaces.maybe }
17
+
18
+ rule(:comma) { spaces? >> str(',') >> spaces? }
19
+ rule(:digit) { match('[0-9]') }
20
+
21
+ rule(:number) {
22
+ (
23
+ str('-').maybe >> (
24
+ str('0') | (match('[1-9]') >> digit.repeat)
25
+ ) >> (
26
+ str('.') >> digit.repeat(1)
27
+ ).maybe >> (
28
+ match('[eE]') >> (str('+') | str('-')).maybe >> digit.repeat(1)
29
+ ).maybe
30
+ ).as(:number)
31
+ }
32
+
33
+ rule(:string) {
34
+ str('"') >> (
35
+ str('\\') >> any | str('"').absent? >> any
36
+ ).repeat.as(:string) >> str('"')
37
+ }
38
+
39
+ rule(:array) {
40
+ str('[') >> spaces? >>
41
+ (value >> (comma >> value).repeat).maybe.as(:array) >>
42
+ spaces? >> str(']')
43
+ }
44
+
45
+ rule(:object) {
46
+ str('{') >> spaces? >>
47
+ (entry >> (comma >> entry).repeat).maybe.as(:object) >>
48
+ spaces? >> str('}')
49
+ }
50
+
51
+ rule(:value) {
52
+ string | number |
53
+ object | array |
54
+ str('true').as(:true) | str('false').as(:false) |
55
+ str('null').as(:null)
56
+ }
57
+
58
+ rule(:entry) {
59
+ (
60
+ string.as(:key) >> spaces? >>
61
+ str(':') >> spaces? >>
62
+ value.as(:val)
63
+ ).as(:entry)
64
+ }
65
+
66
+ rule(:attribute) { (entry | value).as(:attribute) }
67
+
68
+ rule(:top) { spaces? >> value >> spaces? }
69
+
70
+ root(:top)
71
+ end
72
+
73
+ class Transformer < Parslet::Transform
74
+
75
+ class Entry < Struct.new(:key, :val); end
76
+
77
+ rule(:array => subtree(:ar)) {
78
+ ar.is_a?(Array) ? ar : [ ar ]
79
+ }
80
+ rule(:object => subtree(:ob)) {
81
+ (ob.is_a?(Array) ? ob : [ ob ]).inject({}) { |h, e| h[e.key] = e.val; h }
82
+ }
83
+
84
+ rule(:entry => { :key => simple(:ke), :val => simple(:va) }) {
85
+ Entry.new(ke, va)
86
+ }
87
+
88
+ rule(:string => simple(:st)) {
89
+ st.to_s
90
+ }
91
+ rule(:number => simple(:nb)) {
92
+ nb.match(/[eE\.]/) ? Float(nb) : Integer(nb)
93
+ }
94
+
95
+ rule(:null => simple(:nu)) { nil }
96
+ rule(:true => simple(:tr)) { true }
97
+ rule(:false => simple(:fa)) { false }
98
+ end
99
+
100
+ def self.parse(s)
101
+
102
+ parser = Parser.new
103
+ transformer = Transformer.new
104
+
105
+ tree = parser.parse(s)
106
+ puts; p tree; puts
107
+ out = transformer.apply(tree)
108
+
109
+ out
110
+ end
111
+ end
112
+
113
+
114
+ s = %{
115
+ [ 1, 2, 3, null,
116
+ "asdfasdf asdfds", { "a": -1.2 }, { "b": true, "c": false },
117
+ 0.1e24, true, false, [ 1 ] ]
118
+ }
119
+
120
+ out = MyJson.parse(s)
121
+
122
+ p out; puts
123
+
124
+ out == [
125
+ 1, 2, 3, nil,
126
+ "asdfasdf asdfds", { "a" => -1.2 }, { "b" => true, "c" => false },
127
+ 0.1e24, true, false, [ 1 ]
128
+ ] || raise("MyJson is a failure")
@@ -0,0 +1,34 @@
1
+
2
+ # An exploration of two ideas:
3
+ # a) Constructing a whole parser inline, without the artificial class around
4
+ # it.
5
+ # and:
6
+ # b) Constructing non-greedy or non-blind parsers by transforming the
7
+ # grammar.
8
+
9
+ $:.unshift File.dirname(__FILE__) + "/../lib"
10
+
11
+ require 'parslet'
12
+ include Parslet
13
+
14
+ a = str('a').repeat >> str('aa')
15
+
16
+ # E1% E2
17
+ #
18
+ # S = E2 | E1 S
19
+
20
+ def this(name, &block); return Parslet::Atoms::Entity.new(name, &block) end
21
+ def epsilon; any.absent? end
22
+
23
+ # Traditional repetition will try as long as the pattern can be matched and
24
+ # then give up. This is greedy and blind.
25
+ a = str('a').as(:e) >> this('a') { a }.as(:rec) | epsilon
26
+
27
+ # Here's a pattern match that is greedy and non-blind. The first pattern
28
+ # 'a'* will be tried as many times as possible, while still matching the
29
+ # end pattern 'aa'.
30
+ b = str('aa').as(:e2) >> epsilon | str('a').as(:e1) >> this('b') { b }.as(:rec)
31
+
32
+ p a.parse('aaaa')
33
+ p b
34
+ p b.parse('aaaa')
@@ -0,0 +1,44 @@
1
+ # Demonstrates that we have a compatibility fix to mathn's weird idea of
2
+ # integer mathematics.
3
+ # This was contributed by Jonathan Hinkle (https://github.com/hynkle). Thanks!
4
+
5
+ $:.unshift File.dirname(__FILE__) + "/../lib"
6
+
7
+ require 'parslet'
8
+ require 'parslet/convenience'
9
+ include Parslet
10
+
11
+ def attempt_parse
12
+ possible_whitespace = match['\s'].repeat
13
+
14
+ cephalopod =
15
+ str('octopus') |
16
+ str('squid')
17
+
18
+ parenthesized_cephalopod =
19
+ str('(') >>
20
+ possible_whitespace >>
21
+ cephalopod >>
22
+ possible_whitespace >>
23
+ str(')')
24
+
25
+ parser =
26
+ possible_whitespace >>
27
+ parenthesized_cephalopod >>
28
+ possible_whitespace
29
+
30
+ # This parse fails, but that is not the point. When mathn is in the current
31
+ # ruby environment, it modifies integer division in a way that makes
32
+ # parslet loop indefinitely.
33
+ parser.parse %{(\nsqeed)\n}
34
+ rescue Parslet::ParseFailed
35
+ end
36
+
37
+ attempt_parse
38
+ puts 'it terminates before we require mathn'
39
+
40
+ puts "requiring mathn now"
41
+ require 'mathn'
42
+ puts "and trying again (will hang without the fix)"
43
+ attempt_parse # but it doesn't terminate after requiring mathn
44
+ puts "okay!"
@@ -0,0 +1,94 @@
1
+ # Reproduces [1] using parslet.
2
+ # [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
3
+
4
+ $:.unshift File.dirname(__FILE__) + "/../lib"
5
+
6
+ require 'pp'
7
+ require 'parslet'
8
+ require 'parslet/convenience'
9
+
10
+ module MiniLisp
11
+ class Parser < Parslet::Parser
12
+ root :expression
13
+ rule(:expression) {
14
+ space? >> str('(') >> space? >> body >> str(')') >> space?
15
+ }
16
+
17
+ rule(:body) {
18
+ (expression | identifier | float | integer | string).repeat.as(:exp)
19
+ }
20
+
21
+ rule(:space) {
22
+ match('\s').repeat(1)
23
+ }
24
+ rule(:space?) {
25
+ space.maybe
26
+ }
27
+
28
+ rule(:identifier) {
29
+ (match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
30
+ }
31
+
32
+ rule(:float) {
33
+ (
34
+ integer >> (
35
+ str('.') >> match('[0-9]').repeat(1) |
36
+ str('e') >> match('[0-9]').repeat(1)
37
+ ).as(:e)
38
+ ).as(:float) >> space?
39
+ }
40
+
41
+ rule(:integer) {
42
+ ((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
43
+ }
44
+
45
+ rule(:string) {
46
+ str('"') >> (
47
+ str('\\') >> any |
48
+ str('"').absent? >> any
49
+ ).repeat.as(:string) >> str('"') >> space?
50
+ }
51
+ end
52
+
53
+ class Transform
54
+ include Parslet
55
+
56
+ attr_reader :t
57
+ def initialize
58
+ @t = Parslet::Transform.new
59
+
60
+ # To understand these, take a look at what comes out of the parser.
61
+ t.rule(:identifier => simple(:ident)) { ident.to_sym }
62
+
63
+ t.rule(:string => simple(:str)) { str }
64
+
65
+ t.rule(:integer => simple(:int)) { Integer(int) }
66
+
67
+ t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
68
+
69
+ t.rule(:exp => subtree(:exp)) { exp }
70
+ end
71
+
72
+ def do(tree)
73
+ t.apply(tree)
74
+ end
75
+ end
76
+ end
77
+
78
+ parser = MiniLisp::Parser.new
79
+ transform = MiniLisp::Transform.new
80
+
81
+ result = parser.parse_with_debug %Q{
82
+ (define test (lambda ()
83
+ (begin
84
+ (display "something")
85
+ (display 1)
86
+ (display 3.08))))
87
+ }
88
+
89
+ # Transform the result
90
+ pp transform.do(result) if result
91
+
92
+ # Thereby reducing it to the earlier problem:
93
+ # http://github.com/kschiess/toylisp
94
+
@@ -0,0 +1,47 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ require 'pp'
4
+ require "parslet"
5
+
6
+ # Demonstrates modular parsers, split out over many classes. Please look at
7
+ # ip_address.rb as well.
8
+
9
+ module ALanguage
10
+ include Parslet
11
+
12
+ # Parslet rules are really a special kind of method. Mix them into your
13
+ # classes!
14
+ rule(:a_language) { str('aaa') }
15
+ end
16
+
17
+ # Parslet parsers are parslet atoms as well. Create an instance and chain them
18
+ # to your other rules.
19
+ #
20
+ class BLanguage < Parslet::Parser
21
+ root :blang
22
+
23
+ rule(:blang) { str('bbb') }
24
+ end
25
+
26
+ # Parslet atoms are really Ruby values, pass them around.
27
+ c_language = Parslet.str('ccc')
28
+
29
+ class Language < Parslet::Parser
30
+ def initialize(c_language)
31
+ @c_language = c_language
32
+ super()
33
+ end
34
+
35
+ root :root
36
+
37
+ include ALanguage
38
+
39
+ rule(:root) { str('a(') >> a_language >> str(')') >> space |
40
+ str('b(') >> BLanguage.new >> str(')') >> space |
41
+ str('c(') >> @c_language >> str(')') >> space }
42
+ rule(:space) { str(' ').maybe }
43
+ end
44
+
45
+ Language.new(c_language).parse('a(aaa)')
46
+ Language.new(c_language).parse('b(bbb)')
47
+ Language.new(c_language).parse('c(ccc)')
@@ -0,0 +1,132 @@
1
+ $:.unshift File.dirname(__FILE__) + "/../lib"
2
+
3
+ require 'parslet'
4
+ require 'parslet/convenience'
5
+
6
+ # This example demonstrates tree error reporting in a real life example.
7
+ # The parser code has been contributed by John Mettraux.
8
+
9
+ def prettify(str)
10
+ puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
11
+ str.lines.each_with_index do |line, index|
12
+ printf "%02d %s\n",
13
+ index+1,
14
+ line.chomp
15
+ end
16
+ end
17
+
18
+ class Parser < Parslet::Parser
19
+
20
+ # commons
21
+
22
+ rule(:space) { match('[ \t]').repeat(1) }
23
+ rule(:space?) { space.maybe }
24
+
25
+ rule(:newline) { match('[\r\n]') }
26
+
27
+ rule(:comment) { str('#') >> match('[^\r\n]').repeat }
28
+
29
+ rule(:line_separator) {
30
+ (space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
31
+ }
32
+
33
+ rule(:blank) { line_separator | space }
34
+ rule(:blank?) { blank.maybe }
35
+
36
+ rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
37
+
38
+ # res_statement
39
+
40
+ rule(:reference) {
41
+ (str('@').repeat(1,2) >> identifier).as(:reference)
42
+ }
43
+
44
+ rule(:res_action_or_link) {
45
+ str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
46
+ }
47
+
48
+ rule(:res_actions) {
49
+ (
50
+ reference
51
+ ).as(:resources) >>
52
+ (
53
+ res_action_or_link.as(:res_action)
54
+ ).repeat(0).as(:res_actions)
55
+ }
56
+
57
+ rule(:res_statement) {
58
+ res_actions >>
59
+ (str(':') >> identifier.as(:name)).maybe.as(:res_field)
60
+ }
61
+
62
+ # expression
63
+
64
+ rule(:expression) {
65
+ res_statement
66
+ }
67
+
68
+ # body
69
+
70
+ rule(:body) {
71
+ (line_separator >> (block | expression)).repeat(1).as(:body) >>
72
+ line_separator
73
+ }
74
+
75
+ # blocks
76
+
77
+ rule(:begin_block) {
78
+ (str('concurrent').as(:type) >> space).maybe.as(:pre) >>
79
+ str('begin').as(:begin) >>
80
+ body >>
81
+ str('end')
82
+ }
83
+
84
+ rule(:define_block) {
85
+ str('define').as(:define) >> space >>
86
+ identifier.as(:name) >> str('()') >>
87
+ body >>
88
+ str('end')
89
+ }
90
+
91
+ rule(:block) {
92
+ define_block | begin_block
93
+ }
94
+
95
+ # root
96
+
97
+ rule(:radix) {
98
+ line_separator.maybe >> block >> line_separator.maybe
99
+ }
100
+
101
+ root(:radix)
102
+ end
103
+
104
+
105
+ ds = [
106
+ %{
107
+ define f()
108
+ @res.name
109
+ end
110
+ },
111
+ %{
112
+ define f()
113
+ begin
114
+ @res.name
115
+ end
116
+ end
117
+ }
118
+ ]
119
+
120
+ ds.each do |d|
121
+
122
+ puts '-' * 80
123
+ prettify(d)
124
+
125
+ parser = Parser.new
126
+
127
+ begin
128
+ parser.parse_with_debug(d)
129
+ end
130
+ end
131
+
132
+ puts '-' * 80