ghazel-parslet 1.4.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +195 -0
- data/LICENSE +23 -0
- data/README +70 -0
- data/Rakefile +49 -0
- data/example/boolean_algebra.rb +70 -0
- data/example/calc.rb +153 -0
- data/example/comments.rb +35 -0
- data/example/deepest_errors.rb +131 -0
- data/example/documentation.rb +18 -0
- data/example/email_parser.rb +52 -0
- data/example/empty.rb +13 -0
- data/example/erb.rb +47 -0
- data/example/ignore.rb +33 -0
- data/example/ip_address.rb +125 -0
- data/example/json.rb +128 -0
- data/example/local.rb +34 -0
- data/example/mathn.rb +44 -0
- data/example/minilisp.rb +94 -0
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +42 -0
- data/example/readme.rb +30 -0
- data/example/seasons.rb +46 -0
- data/example/sentence.rb +36 -0
- data/example/simple.lit +3 -0
- data/example/simple_xml.rb +54 -0
- data/example/string_parser.rb +77 -0
- data/example/test.lit +4 -0
- data/lib/parslet.rb +254 -0
- data/lib/parslet/atoms.rb +32 -0
- data/lib/parslet/atoms/alternative.rb +50 -0
- data/lib/parslet/atoms/base.rb +124 -0
- data/lib/parslet/atoms/can_flatten.rb +137 -0
- data/lib/parslet/atoms/context.rb +94 -0
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +41 -0
- data/lib/parslet/atoms/lookahead.rb +49 -0
- data/lib/parslet/atoms/named.rb +32 -0
- data/lib/parslet/atoms/re.rb +38 -0
- data/lib/parslet/atoms/repetition.rb +63 -0
- data/lib/parslet/atoms/rule.rb +12 -0
- data/lib/parslet/atoms/rule/position.rb +143 -0
- data/lib/parslet/atoms/sequence.rb +38 -0
- data/lib/parslet/atoms/str.rb +37 -0
- data/lib/parslet/atoms/visitor.rb +89 -0
- data/lib/parslet/cause.rb +94 -0
- data/lib/parslet/convenience.rb +35 -0
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +162 -0
- data/lib/parslet/expression.rb +51 -0
- data/lib/parslet/expression/treetop.rb +92 -0
- data/lib/parslet/parser.rb +67 -0
- data/lib/parslet/pattern.rb +114 -0
- data/lib/parslet/pattern/binding.rb +49 -0
- data/lib/parslet/rig/rspec.rb +51 -0
- data/lib/parslet/slice.rb +101 -0
- data/lib/parslet/source.rb +62 -0
- data/lib/parslet/source/line_cache.rb +95 -0
- data/lib/parslet/transform.rb +236 -0
- data/lib/parslet/transform/context.rb +32 -0
- metadata +264 -0
data/example/json.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
#
|
4
|
+
# MIT License - (c) 2011 John Mettraux
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'parslet' # gem install parslet
|
9
|
+
|
10
|
+
|
11
|
+
module MyJson
|
12
|
+
|
13
|
+
class Parser < Parslet::Parser
|
14
|
+
|
15
|
+
rule(:spaces) { match('\s').repeat(1) }
|
16
|
+
rule(:spaces?) { spaces.maybe }
|
17
|
+
|
18
|
+
rule(:comma) { spaces? >> str(',') >> spaces? }
|
19
|
+
rule(:digit) { match('[0-9]') }
|
20
|
+
|
21
|
+
rule(:number) {
|
22
|
+
(
|
23
|
+
str('-').maybe >> (
|
24
|
+
str('0') | (match('[1-9]') >> digit.repeat)
|
25
|
+
) >> (
|
26
|
+
str('.') >> digit.repeat(1)
|
27
|
+
).maybe >> (
|
28
|
+
match('[eE]') >> (str('+') | str('-')).maybe >> digit.repeat(1)
|
29
|
+
).maybe
|
30
|
+
).as(:number)
|
31
|
+
}
|
32
|
+
|
33
|
+
rule(:string) {
|
34
|
+
str('"') >> (
|
35
|
+
str('\\') >> any | str('"').absent? >> any
|
36
|
+
).repeat.as(:string) >> str('"')
|
37
|
+
}
|
38
|
+
|
39
|
+
rule(:array) {
|
40
|
+
str('[') >> spaces? >>
|
41
|
+
(value >> (comma >> value).repeat).maybe.as(:array) >>
|
42
|
+
spaces? >> str(']')
|
43
|
+
}
|
44
|
+
|
45
|
+
rule(:object) {
|
46
|
+
str('{') >> spaces? >>
|
47
|
+
(entry >> (comma >> entry).repeat).maybe.as(:object) >>
|
48
|
+
spaces? >> str('}')
|
49
|
+
}
|
50
|
+
|
51
|
+
rule(:value) {
|
52
|
+
string | number |
|
53
|
+
object | array |
|
54
|
+
str('true').as(:true) | str('false').as(:false) |
|
55
|
+
str('null').as(:null)
|
56
|
+
}
|
57
|
+
|
58
|
+
rule(:entry) {
|
59
|
+
(
|
60
|
+
string.as(:key) >> spaces? >>
|
61
|
+
str(':') >> spaces? >>
|
62
|
+
value.as(:val)
|
63
|
+
).as(:entry)
|
64
|
+
}
|
65
|
+
|
66
|
+
rule(:attribute) { (entry | value).as(:attribute) }
|
67
|
+
|
68
|
+
rule(:top) { spaces? >> value >> spaces? }
|
69
|
+
|
70
|
+
root(:top)
|
71
|
+
end
|
72
|
+
|
73
|
+
class Transformer < Parslet::Transform
|
74
|
+
|
75
|
+
class Entry < Struct.new(:key, :val); end
|
76
|
+
|
77
|
+
rule(:array => subtree(:ar)) {
|
78
|
+
ar.is_a?(Array) ? ar : [ ar ]
|
79
|
+
}
|
80
|
+
rule(:object => subtree(:ob)) {
|
81
|
+
(ob.is_a?(Array) ? ob : [ ob ]).inject({}) { |h, e| h[e.key] = e.val; h }
|
82
|
+
}
|
83
|
+
|
84
|
+
rule(:entry => { :key => simple(:ke), :val => simple(:va) }) {
|
85
|
+
Entry.new(ke, va)
|
86
|
+
}
|
87
|
+
|
88
|
+
rule(:string => simple(:st)) {
|
89
|
+
st.to_s
|
90
|
+
}
|
91
|
+
rule(:number => simple(:nb)) {
|
92
|
+
nb.match(/[eE\.]/) ? Float(nb) : Integer(nb)
|
93
|
+
}
|
94
|
+
|
95
|
+
rule(:null => simple(:nu)) { nil }
|
96
|
+
rule(:true => simple(:tr)) { true }
|
97
|
+
rule(:false => simple(:fa)) { false }
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.parse(s)
|
101
|
+
|
102
|
+
parser = Parser.new
|
103
|
+
transformer = Transformer.new
|
104
|
+
|
105
|
+
tree = parser.parse(s)
|
106
|
+
puts; p tree; puts
|
107
|
+
out = transformer.apply(tree)
|
108
|
+
|
109
|
+
out
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
s = %{
|
115
|
+
[ 1, 2, 3, null,
|
116
|
+
"asdfasdf asdfds", { "a": -1.2 }, { "b": true, "c": false },
|
117
|
+
0.1e24, true, false, [ 1 ] ]
|
118
|
+
}
|
119
|
+
|
120
|
+
out = MyJson.parse(s)
|
121
|
+
|
122
|
+
p out; puts
|
123
|
+
|
124
|
+
out == [
|
125
|
+
1, 2, 3, nil,
|
126
|
+
"asdfasdf asdfds", { "a" => -1.2 }, { "b" => true, "c" => false },
|
127
|
+
0.1e24, true, false, [ 1 ]
|
128
|
+
] || raise("MyJson is a failure")
|
data/example/local.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
# An exploration of two ideas:
|
3
|
+
# a) Constructing a whole parser inline, without the artificial class around
|
4
|
+
# it.
|
5
|
+
# and:
|
6
|
+
# b) Constructing non-greedy or non-blind parsers by transforming the
|
7
|
+
# grammar.
|
8
|
+
|
9
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
10
|
+
|
11
|
+
require 'parslet'
|
12
|
+
include Parslet
|
13
|
+
|
14
|
+
a = str('a').repeat >> str('aa')
|
15
|
+
|
16
|
+
# E1% E2
|
17
|
+
#
|
18
|
+
# S = E2 | E1 S
|
19
|
+
|
20
|
+
def this(name, &block); return Parslet::Atoms::Entity.new(name, &block) end
|
21
|
+
def epsilon; any.absent? end
|
22
|
+
|
23
|
+
# Traditional repetition will try as long as the pattern can be matched and
|
24
|
+
# then give up. This is greedy and blind.
|
25
|
+
a = str('a').as(:e) >> this('a') { a }.as(:rec) | epsilon
|
26
|
+
|
27
|
+
# Here's a pattern match that is greedy and non-blind. The first pattern
|
28
|
+
# 'a'* will be tried as many times as possible, while still matching the
|
29
|
+
# end pattern 'aa'.
|
30
|
+
b = str('aa').as(:e2) >> epsilon | str('a').as(:e1) >> this('b') { b }.as(:rec)
|
31
|
+
|
32
|
+
p a.parse('aaaa')
|
33
|
+
p b
|
34
|
+
p b.parse('aaaa')
|
data/example/mathn.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# Demonstrates that we have a compatibility fix to mathn's weird idea of
|
2
|
+
# integer mathematics.
|
3
|
+
# This was contributed by Jonathan Hinkle (https://github.com/hynkle). Thanks!
|
4
|
+
|
5
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
6
|
+
|
7
|
+
require 'parslet'
|
8
|
+
require 'parslet/convenience'
|
9
|
+
include Parslet
|
10
|
+
|
11
|
+
def attempt_parse
|
12
|
+
possible_whitespace = match['\s'].repeat
|
13
|
+
|
14
|
+
cephalopod =
|
15
|
+
str('octopus') |
|
16
|
+
str('squid')
|
17
|
+
|
18
|
+
parenthesized_cephalopod =
|
19
|
+
str('(') >>
|
20
|
+
possible_whitespace >>
|
21
|
+
cephalopod >>
|
22
|
+
possible_whitespace >>
|
23
|
+
str(')')
|
24
|
+
|
25
|
+
parser =
|
26
|
+
possible_whitespace >>
|
27
|
+
parenthesized_cephalopod >>
|
28
|
+
possible_whitespace
|
29
|
+
|
30
|
+
# This parse fails, but that is not the point. When mathn is in the current
|
31
|
+
# ruby environment, it modifies integer division in a way that makes
|
32
|
+
# parslet loop indefinitely.
|
33
|
+
parser.parse %{(\nsqeed)\n}
|
34
|
+
rescue Parslet::ParseFailed
|
35
|
+
end
|
36
|
+
|
37
|
+
attempt_parse
|
38
|
+
puts 'it terminates before we require mathn'
|
39
|
+
|
40
|
+
puts "requiring mathn now"
|
41
|
+
require 'mathn'
|
42
|
+
puts "and trying again (will hang without the fix)"
|
43
|
+
attempt_parse # but it doesn't terminate after requiring mathn
|
44
|
+
puts "okay!"
|
data/example/minilisp.rb
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
# Reproduces [1] using parslet.
|
2
|
+
# [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
|
3
|
+
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
5
|
+
|
6
|
+
require 'pp'
|
7
|
+
require 'parslet'
|
8
|
+
require 'parslet/convenience'
|
9
|
+
|
10
|
+
module MiniLisp
|
11
|
+
class Parser < Parslet::Parser
|
12
|
+
root :expression
|
13
|
+
rule(:expression) {
|
14
|
+
space? >> str('(') >> space? >> body >> str(')') >> space?
|
15
|
+
}
|
16
|
+
|
17
|
+
rule(:body) {
|
18
|
+
(expression | identifier | float | integer | string).repeat.as(:exp)
|
19
|
+
}
|
20
|
+
|
21
|
+
rule(:space) {
|
22
|
+
match('\s').repeat(1)
|
23
|
+
}
|
24
|
+
rule(:space?) {
|
25
|
+
space.maybe
|
26
|
+
}
|
27
|
+
|
28
|
+
rule(:identifier) {
|
29
|
+
(match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
|
30
|
+
}
|
31
|
+
|
32
|
+
rule(:float) {
|
33
|
+
(
|
34
|
+
integer >> (
|
35
|
+
str('.') >> match('[0-9]').repeat(1) |
|
36
|
+
str('e') >> match('[0-9]').repeat(1)
|
37
|
+
).as(:e)
|
38
|
+
).as(:float) >> space?
|
39
|
+
}
|
40
|
+
|
41
|
+
rule(:integer) {
|
42
|
+
((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
|
43
|
+
}
|
44
|
+
|
45
|
+
rule(:string) {
|
46
|
+
str('"') >> (
|
47
|
+
str('\\') >> any |
|
48
|
+
str('"').absent? >> any
|
49
|
+
).repeat.as(:string) >> str('"') >> space?
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
class Transform
|
54
|
+
include Parslet
|
55
|
+
|
56
|
+
attr_reader :t
|
57
|
+
def initialize
|
58
|
+
@t = Parslet::Transform.new
|
59
|
+
|
60
|
+
# To understand these, take a look at what comes out of the parser.
|
61
|
+
t.rule(:identifier => simple(:ident)) { ident.to_sym }
|
62
|
+
|
63
|
+
t.rule(:string => simple(:str)) { str }
|
64
|
+
|
65
|
+
t.rule(:integer => simple(:int)) { Integer(int) }
|
66
|
+
|
67
|
+
t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
|
68
|
+
|
69
|
+
t.rule(:exp => subtree(:exp)) { exp }
|
70
|
+
end
|
71
|
+
|
72
|
+
def do(tree)
|
73
|
+
t.apply(tree)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
parser = MiniLisp::Parser.new
|
79
|
+
transform = MiniLisp::Transform.new
|
80
|
+
|
81
|
+
result = parser.parse_with_debug %Q{
|
82
|
+
(define test (lambda ()
|
83
|
+
(begin
|
84
|
+
(display "something")
|
85
|
+
(display 1)
|
86
|
+
(display 3.08))))
|
87
|
+
}
|
88
|
+
|
89
|
+
# Transform the result
|
90
|
+
pp transform.do(result) if result
|
91
|
+
|
92
|
+
# Thereby reducing it to the earlier problem:
|
93
|
+
# http://github.com/kschiess/toylisp
|
94
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require "parslet"
|
5
|
+
|
6
|
+
# Demonstrates modular parsers, split out over many classes. Please look at
|
7
|
+
# ip_address.rb as well.
|
8
|
+
|
9
|
+
module ALanguage
|
10
|
+
include Parslet
|
11
|
+
|
12
|
+
# Parslet rules are really a special kind of method. Mix them into your
|
13
|
+
# classes!
|
14
|
+
rule(:a_language) { str('aaa') }
|
15
|
+
end
|
16
|
+
|
17
|
+
# Parslet parsers are parslet atoms as well. Create an instance and chain them
|
18
|
+
# to your other rules.
|
19
|
+
#
|
20
|
+
class BLanguage < Parslet::Parser
|
21
|
+
root :blang
|
22
|
+
|
23
|
+
rule(:blang) { str('bbb') }
|
24
|
+
end
|
25
|
+
|
26
|
+
# Parslet atoms are really Ruby values, pass them around.
|
27
|
+
c_language = Parslet.str('ccc')
|
28
|
+
|
29
|
+
class Language < Parslet::Parser
|
30
|
+
def initialize(c_language)
|
31
|
+
@c_language = c_language
|
32
|
+
super()
|
33
|
+
end
|
34
|
+
|
35
|
+
root :root
|
36
|
+
|
37
|
+
include ALanguage
|
38
|
+
|
39
|
+
rule(:root) { str('a(') >> a_language >> str(')') >> space |
|
40
|
+
str('b(') >> BLanguage.new >> str(')') >> space |
|
41
|
+
str('c(') >> @c_language >> str(')') >> space }
|
42
|
+
rule(:space) { str(' ').maybe }
|
43
|
+
end
|
44
|
+
|
45
|
+
Language.new(c_language).parse('a(aaa)')
|
46
|
+
Language.new(c_language).parse('b(bbb)')
|
47
|
+
Language.new(c_language).parse('c(ccc)')
|
@@ -0,0 +1,132 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
require 'parslet/convenience'
|
5
|
+
|
6
|
+
# This example demonstrates tree error reporting in a real life example.
|
7
|
+
# The parser code has been contributed by John Mettraux.
|
8
|
+
|
9
|
+
def prettify(str)
|
10
|
+
puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
|
11
|
+
str.lines.each_with_index do |line, index|
|
12
|
+
printf "%02d %s\n",
|
13
|
+
index+1,
|
14
|
+
line.chomp
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class Parser < Parslet::Parser
|
19
|
+
|
20
|
+
# commons
|
21
|
+
|
22
|
+
rule(:space) { match('[ \t]').repeat(1) }
|
23
|
+
rule(:space?) { space.maybe }
|
24
|
+
|
25
|
+
rule(:newline) { match('[\r\n]') }
|
26
|
+
|
27
|
+
rule(:comment) { str('#') >> match('[^\r\n]').repeat }
|
28
|
+
|
29
|
+
rule(:line_separator) {
|
30
|
+
(space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
|
31
|
+
}
|
32
|
+
|
33
|
+
rule(:blank) { line_separator | space }
|
34
|
+
rule(:blank?) { blank.maybe }
|
35
|
+
|
36
|
+
rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
|
37
|
+
|
38
|
+
# res_statement
|
39
|
+
|
40
|
+
rule(:reference) {
|
41
|
+
(str('@').repeat(1,2) >> identifier).as(:reference)
|
42
|
+
}
|
43
|
+
|
44
|
+
rule(:res_action_or_link) {
|
45
|
+
str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
|
46
|
+
}
|
47
|
+
|
48
|
+
rule(:res_actions) {
|
49
|
+
(
|
50
|
+
reference
|
51
|
+
).as(:resources) >>
|
52
|
+
(
|
53
|
+
res_action_or_link.as(:res_action)
|
54
|
+
).repeat(0).as(:res_actions)
|
55
|
+
}
|
56
|
+
|
57
|
+
rule(:res_statement) {
|
58
|
+
res_actions >>
|
59
|
+
(str(':') >> identifier.as(:name)).maybe.as(:res_field)
|
60
|
+
}
|
61
|
+
|
62
|
+
# expression
|
63
|
+
|
64
|
+
rule(:expression) {
|
65
|
+
res_statement
|
66
|
+
}
|
67
|
+
|
68
|
+
# body
|
69
|
+
|
70
|
+
rule(:body) {
|
71
|
+
(line_separator >> (block | expression)).repeat(1).as(:body) >>
|
72
|
+
line_separator
|
73
|
+
}
|
74
|
+
|
75
|
+
# blocks
|
76
|
+
|
77
|
+
rule(:begin_block) {
|
78
|
+
(str('concurrent').as(:type) >> space).maybe.as(:pre) >>
|
79
|
+
str('begin').as(:begin) >>
|
80
|
+
body >>
|
81
|
+
str('end')
|
82
|
+
}
|
83
|
+
|
84
|
+
rule(:define_block) {
|
85
|
+
str('define').as(:define) >> space >>
|
86
|
+
identifier.as(:name) >> str('()') >>
|
87
|
+
body >>
|
88
|
+
str('end')
|
89
|
+
}
|
90
|
+
|
91
|
+
rule(:block) {
|
92
|
+
define_block | begin_block
|
93
|
+
}
|
94
|
+
|
95
|
+
# root
|
96
|
+
|
97
|
+
rule(:radix) {
|
98
|
+
line_separator.maybe >> block >> line_separator.maybe
|
99
|
+
}
|
100
|
+
|
101
|
+
root(:radix)
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
ds = [
|
106
|
+
%{
|
107
|
+
define f()
|
108
|
+
@res.name
|
109
|
+
end
|
110
|
+
},
|
111
|
+
%{
|
112
|
+
define f()
|
113
|
+
begin
|
114
|
+
@res.name
|
115
|
+
end
|
116
|
+
end
|
117
|
+
}
|
118
|
+
]
|
119
|
+
|
120
|
+
ds.each do |d|
|
121
|
+
|
122
|
+
puts '-' * 80
|
123
|
+
prettify(d)
|
124
|
+
|
125
|
+
parser = Parser.new
|
126
|
+
|
127
|
+
begin
|
128
|
+
parser.parse_with_debug(d)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
puts '-' * 80
|