ghazel-parslet 1.4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.txt +195 -0
- data/LICENSE +23 -0
- data/README +70 -0
- data/Rakefile +49 -0
- data/example/boolean_algebra.rb +70 -0
- data/example/calc.rb +153 -0
- data/example/comments.rb +35 -0
- data/example/deepest_errors.rb +131 -0
- data/example/documentation.rb +18 -0
- data/example/email_parser.rb +52 -0
- data/example/empty.rb +13 -0
- data/example/erb.rb +47 -0
- data/example/ignore.rb +33 -0
- data/example/ip_address.rb +125 -0
- data/example/json.rb +128 -0
- data/example/local.rb +34 -0
- data/example/mathn.rb +44 -0
- data/example/minilisp.rb +94 -0
- data/example/modularity.rb +47 -0
- data/example/nested_errors.rb +132 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/calc.out +1 -0
- data/example/output/comments.out +8 -0
- data/example/output/deepest_errors.out +54 -0
- data/example/output/documentation.err +4 -0
- data/example/output/documentation.out +1 -0
- data/example/output/email_parser.out +2 -0
- data/example/output/empty.err +1 -0
- data/example/output/erb.out +7 -0
- data/example/output/ignore.out +1 -0
- data/example/output/ignore_whitespace.out +1 -0
- data/example/output/ip_address.out +9 -0
- data/example/output/json.out +5 -0
- data/example/output/local.out +3 -0
- data/example/output/mathn.out +4 -0
- data/example/output/minilisp.out +5 -0
- data/example/output/modularity.out +0 -0
- data/example/output/nested_errors.out +54 -0
- data/example/output/parens.out +8 -0
- data/example/output/readme.out +1 -0
- data/example/output/seasons.out +28 -0
- data/example/output/sentence.out +1 -0
- data/example/output/simple_xml.out +2 -0
- data/example/output/string_parser.out +3 -0
- data/example/parens.rb +42 -0
- data/example/readme.rb +30 -0
- data/example/seasons.rb +46 -0
- data/example/sentence.rb +36 -0
- data/example/simple.lit +3 -0
- data/example/simple_xml.rb +54 -0
- data/example/string_parser.rb +77 -0
- data/example/test.lit +4 -0
- data/lib/parslet.rb +254 -0
- data/lib/parslet/atoms.rb +32 -0
- data/lib/parslet/atoms/alternative.rb +50 -0
- data/lib/parslet/atoms/base.rb +124 -0
- data/lib/parslet/atoms/can_flatten.rb +137 -0
- data/lib/parslet/atoms/context.rb +94 -0
- data/lib/parslet/atoms/dsl.rb +98 -0
- data/lib/parslet/atoms/entity.rb +41 -0
- data/lib/parslet/atoms/lookahead.rb +49 -0
- data/lib/parslet/atoms/named.rb +32 -0
- data/lib/parslet/atoms/re.rb +38 -0
- data/lib/parslet/atoms/repetition.rb +63 -0
- data/lib/parslet/atoms/rule.rb +12 -0
- data/lib/parslet/atoms/rule/position.rb +143 -0
- data/lib/parslet/atoms/sequence.rb +38 -0
- data/lib/parslet/atoms/str.rb +37 -0
- data/lib/parslet/atoms/visitor.rb +89 -0
- data/lib/parslet/cause.rb +94 -0
- data/lib/parslet/convenience.rb +35 -0
- data/lib/parslet/error_reporter.rb +7 -0
- data/lib/parslet/error_reporter/deepest.rb +95 -0
- data/lib/parslet/error_reporter/tree.rb +57 -0
- data/lib/parslet/export.rb +162 -0
- data/lib/parslet/expression.rb +51 -0
- data/lib/parslet/expression/treetop.rb +92 -0
- data/lib/parslet/parser.rb +67 -0
- data/lib/parslet/pattern.rb +114 -0
- data/lib/parslet/pattern/binding.rb +49 -0
- data/lib/parslet/rig/rspec.rb +51 -0
- data/lib/parslet/slice.rb +101 -0
- data/lib/parslet/source.rb +62 -0
- data/lib/parslet/source/line_cache.rb +95 -0
- data/lib/parslet/transform.rb +236 -0
- data/lib/parslet/transform/context.rb +32 -0
- metadata +264 -0
data/example/json.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
#
|
4
|
+
# MIT License - (c) 2011 John Mettraux
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'parslet' # gem install parslet
|
9
|
+
|
10
|
+
|
11
|
+
module MyJson
|
12
|
+
|
13
|
+
class Parser < Parslet::Parser
|
14
|
+
|
15
|
+
rule(:spaces) { match('\s').repeat(1) }
|
16
|
+
rule(:spaces?) { spaces.maybe }
|
17
|
+
|
18
|
+
rule(:comma) { spaces? >> str(',') >> spaces? }
|
19
|
+
rule(:digit) { match('[0-9]') }
|
20
|
+
|
21
|
+
rule(:number) {
|
22
|
+
(
|
23
|
+
str('-').maybe >> (
|
24
|
+
str('0') | (match('[1-9]') >> digit.repeat)
|
25
|
+
) >> (
|
26
|
+
str('.') >> digit.repeat(1)
|
27
|
+
).maybe >> (
|
28
|
+
match('[eE]') >> (str('+') | str('-')).maybe >> digit.repeat(1)
|
29
|
+
).maybe
|
30
|
+
).as(:number)
|
31
|
+
}
|
32
|
+
|
33
|
+
rule(:string) {
|
34
|
+
str('"') >> (
|
35
|
+
str('\\') >> any | str('"').absent? >> any
|
36
|
+
).repeat.as(:string) >> str('"')
|
37
|
+
}
|
38
|
+
|
39
|
+
rule(:array) {
|
40
|
+
str('[') >> spaces? >>
|
41
|
+
(value >> (comma >> value).repeat).maybe.as(:array) >>
|
42
|
+
spaces? >> str(']')
|
43
|
+
}
|
44
|
+
|
45
|
+
rule(:object) {
|
46
|
+
str('{') >> spaces? >>
|
47
|
+
(entry >> (comma >> entry).repeat).maybe.as(:object) >>
|
48
|
+
spaces? >> str('}')
|
49
|
+
}
|
50
|
+
|
51
|
+
rule(:value) {
|
52
|
+
string | number |
|
53
|
+
object | array |
|
54
|
+
str('true').as(:true) | str('false').as(:false) |
|
55
|
+
str('null').as(:null)
|
56
|
+
}
|
57
|
+
|
58
|
+
rule(:entry) {
|
59
|
+
(
|
60
|
+
string.as(:key) >> spaces? >>
|
61
|
+
str(':') >> spaces? >>
|
62
|
+
value.as(:val)
|
63
|
+
).as(:entry)
|
64
|
+
}
|
65
|
+
|
66
|
+
rule(:attribute) { (entry | value).as(:attribute) }
|
67
|
+
|
68
|
+
rule(:top) { spaces? >> value >> spaces? }
|
69
|
+
|
70
|
+
root(:top)
|
71
|
+
end
|
72
|
+
|
73
|
+
class Transformer < Parslet::Transform
|
74
|
+
|
75
|
+
class Entry < Struct.new(:key, :val); end
|
76
|
+
|
77
|
+
rule(:array => subtree(:ar)) {
|
78
|
+
ar.is_a?(Array) ? ar : [ ar ]
|
79
|
+
}
|
80
|
+
rule(:object => subtree(:ob)) {
|
81
|
+
(ob.is_a?(Array) ? ob : [ ob ]).inject({}) { |h, e| h[e.key] = e.val; h }
|
82
|
+
}
|
83
|
+
|
84
|
+
rule(:entry => { :key => simple(:ke), :val => simple(:va) }) {
|
85
|
+
Entry.new(ke, va)
|
86
|
+
}
|
87
|
+
|
88
|
+
rule(:string => simple(:st)) {
|
89
|
+
st.to_s
|
90
|
+
}
|
91
|
+
rule(:number => simple(:nb)) {
|
92
|
+
nb.match(/[eE\.]/) ? Float(nb) : Integer(nb)
|
93
|
+
}
|
94
|
+
|
95
|
+
rule(:null => simple(:nu)) { nil }
|
96
|
+
rule(:true => simple(:tr)) { true }
|
97
|
+
rule(:false => simple(:fa)) { false }
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.parse(s)
|
101
|
+
|
102
|
+
parser = Parser.new
|
103
|
+
transformer = Transformer.new
|
104
|
+
|
105
|
+
tree = parser.parse(s)
|
106
|
+
puts; p tree; puts
|
107
|
+
out = transformer.apply(tree)
|
108
|
+
|
109
|
+
out
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
|
114
|
+
s = %{
|
115
|
+
[ 1, 2, 3, null,
|
116
|
+
"asdfasdf asdfds", { "a": -1.2 }, { "b": true, "c": false },
|
117
|
+
0.1e24, true, false, [ 1 ] ]
|
118
|
+
}
|
119
|
+
|
120
|
+
out = MyJson.parse(s)
|
121
|
+
|
122
|
+
p out; puts
|
123
|
+
|
124
|
+
out == [
|
125
|
+
1, 2, 3, nil,
|
126
|
+
"asdfasdf asdfds", { "a" => -1.2 }, { "b" => true, "c" => false },
|
127
|
+
0.1e24, true, false, [ 1 ]
|
128
|
+
] || raise("MyJson is a failure")
|
data/example/local.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
|
2
|
+
# An exploration of two ideas:
|
3
|
+
# a) Constructing a whole parser inline, without the artificial class around
|
4
|
+
# it.
|
5
|
+
# and:
|
6
|
+
# b) Constructing non-greedy or non-blind parsers by transforming the
|
7
|
+
# grammar.
|
8
|
+
|
9
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
10
|
+
|
11
|
+
require 'parslet'
|
12
|
+
include Parslet
|
13
|
+
|
14
|
+
a = str('a').repeat >> str('aa')
|
15
|
+
|
16
|
+
# E1% E2
|
17
|
+
#
|
18
|
+
# S = E2 | E1 S
|
19
|
+
|
20
|
+
def this(name, &block); return Parslet::Atoms::Entity.new(name, &block) end
|
21
|
+
def epsilon; any.absent? end
|
22
|
+
|
23
|
+
# Traditional repetition will try as long as the pattern can be matched and
|
24
|
+
# then give up. This is greedy and blind.
|
25
|
+
a = str('a').as(:e) >> this('a') { a }.as(:rec) | epsilon
|
26
|
+
|
27
|
+
# Here's a pattern match that is greedy and non-blind. The first pattern
|
28
|
+
# 'a'* will be tried as many times as possible, while still matching the
|
29
|
+
# end pattern 'aa'.
|
30
|
+
b = str('aa').as(:e2) >> epsilon | str('a').as(:e1) >> this('b') { b }.as(:rec)
|
31
|
+
|
32
|
+
p a.parse('aaaa')
|
33
|
+
p b
|
34
|
+
p b.parse('aaaa')
|
data/example/mathn.rb
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# Demonstrates that we have a compatibility fix to mathn's weird idea of
|
2
|
+
# integer mathematics.
|
3
|
+
# This was contributed by Jonathan Hinkle (https://github.com/hynkle). Thanks!
|
4
|
+
|
5
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
6
|
+
|
7
|
+
require 'parslet'
|
8
|
+
require 'parslet/convenience'
|
9
|
+
include Parslet
|
10
|
+
|
11
|
+
def attempt_parse
|
12
|
+
possible_whitespace = match['\s'].repeat
|
13
|
+
|
14
|
+
cephalopod =
|
15
|
+
str('octopus') |
|
16
|
+
str('squid')
|
17
|
+
|
18
|
+
parenthesized_cephalopod =
|
19
|
+
str('(') >>
|
20
|
+
possible_whitespace >>
|
21
|
+
cephalopod >>
|
22
|
+
possible_whitespace >>
|
23
|
+
str(')')
|
24
|
+
|
25
|
+
parser =
|
26
|
+
possible_whitespace >>
|
27
|
+
parenthesized_cephalopod >>
|
28
|
+
possible_whitespace
|
29
|
+
|
30
|
+
# This parse fails, but that is not the point. When mathn is in the current
|
31
|
+
# ruby environment, it modifies integer division in a way that makes
|
32
|
+
# parslet loop indefinitely.
|
33
|
+
parser.parse %{(\nsqeed)\n}
|
34
|
+
rescue Parslet::ParseFailed
|
35
|
+
end
|
36
|
+
|
37
|
+
attempt_parse
|
38
|
+
puts 'it terminates before we require mathn'
|
39
|
+
|
40
|
+
puts "requiring mathn now"
|
41
|
+
require 'mathn'
|
42
|
+
puts "and trying again (will hang without the fix)"
|
43
|
+
attempt_parse # but it doesn't terminate after requiring mathn
|
44
|
+
puts "okay!"
|
data/example/minilisp.rb
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
# Reproduces [1] using parslet.
|
2
|
+
# [1] http://thingsaaronmade.com/blog/a-quick-intro-to-writing-a-parser-using-treetop.html
|
3
|
+
|
4
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
5
|
+
|
6
|
+
require 'pp'
|
7
|
+
require 'parslet'
|
8
|
+
require 'parslet/convenience'
|
9
|
+
|
10
|
+
module MiniLisp
|
11
|
+
class Parser < Parslet::Parser
|
12
|
+
root :expression
|
13
|
+
rule(:expression) {
|
14
|
+
space? >> str('(') >> space? >> body >> str(')') >> space?
|
15
|
+
}
|
16
|
+
|
17
|
+
rule(:body) {
|
18
|
+
(expression | identifier | float | integer | string).repeat.as(:exp)
|
19
|
+
}
|
20
|
+
|
21
|
+
rule(:space) {
|
22
|
+
match('\s').repeat(1)
|
23
|
+
}
|
24
|
+
rule(:space?) {
|
25
|
+
space.maybe
|
26
|
+
}
|
27
|
+
|
28
|
+
rule(:identifier) {
|
29
|
+
(match('[a-zA-Z=*]') >> match('[a-zA-Z=*_]').repeat).as(:identifier) >> space?
|
30
|
+
}
|
31
|
+
|
32
|
+
rule(:float) {
|
33
|
+
(
|
34
|
+
integer >> (
|
35
|
+
str('.') >> match('[0-9]').repeat(1) |
|
36
|
+
str('e') >> match('[0-9]').repeat(1)
|
37
|
+
).as(:e)
|
38
|
+
).as(:float) >> space?
|
39
|
+
}
|
40
|
+
|
41
|
+
rule(:integer) {
|
42
|
+
((str('+') | str('-')).maybe >> match("[0-9]").repeat(1)).as(:integer) >> space?
|
43
|
+
}
|
44
|
+
|
45
|
+
rule(:string) {
|
46
|
+
str('"') >> (
|
47
|
+
str('\\') >> any |
|
48
|
+
str('"').absent? >> any
|
49
|
+
).repeat.as(:string) >> str('"') >> space?
|
50
|
+
}
|
51
|
+
end
|
52
|
+
|
53
|
+
class Transform
|
54
|
+
include Parslet
|
55
|
+
|
56
|
+
attr_reader :t
|
57
|
+
def initialize
|
58
|
+
@t = Parslet::Transform.new
|
59
|
+
|
60
|
+
# To understand these, take a look at what comes out of the parser.
|
61
|
+
t.rule(:identifier => simple(:ident)) { ident.to_sym }
|
62
|
+
|
63
|
+
t.rule(:string => simple(:str)) { str }
|
64
|
+
|
65
|
+
t.rule(:integer => simple(:int)) { Integer(int) }
|
66
|
+
|
67
|
+
t.rule(:float=>{:integer=> simple(:a), :e=> simple(:b)}) { Float(a + b) }
|
68
|
+
|
69
|
+
t.rule(:exp => subtree(:exp)) { exp }
|
70
|
+
end
|
71
|
+
|
72
|
+
def do(tree)
|
73
|
+
t.apply(tree)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
parser = MiniLisp::Parser.new
|
79
|
+
transform = MiniLisp::Transform.new
|
80
|
+
|
81
|
+
result = parser.parse_with_debug %Q{
|
82
|
+
(define test (lambda ()
|
83
|
+
(begin
|
84
|
+
(display "something")
|
85
|
+
(display 1)
|
86
|
+
(display 3.08))))
|
87
|
+
}
|
88
|
+
|
89
|
+
# Transform the result
|
90
|
+
pp transform.do(result) if result
|
91
|
+
|
92
|
+
# Thereby reducing it to the earlier problem:
|
93
|
+
# http://github.com/kschiess/toylisp
|
94
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require "parslet"
|
5
|
+
|
6
|
+
# Demonstrates modular parsers, split out over many classes. Please look at
|
7
|
+
# ip_address.rb as well.
|
8
|
+
|
9
|
+
module ALanguage
|
10
|
+
include Parslet
|
11
|
+
|
12
|
+
# Parslet rules are really a special kind of method. Mix them into your
|
13
|
+
# classes!
|
14
|
+
rule(:a_language) { str('aaa') }
|
15
|
+
end
|
16
|
+
|
17
|
+
# Parslet parsers are parslet atoms as well. Create an instance and chain them
|
18
|
+
# to your other rules.
|
19
|
+
#
|
20
|
+
class BLanguage < Parslet::Parser
|
21
|
+
root :blang
|
22
|
+
|
23
|
+
rule(:blang) { str('bbb') }
|
24
|
+
end
|
25
|
+
|
26
|
+
# Parslet atoms are really Ruby values, pass them around.
|
27
|
+
c_language = Parslet.str('ccc')
|
28
|
+
|
29
|
+
class Language < Parslet::Parser
|
30
|
+
def initialize(c_language)
|
31
|
+
@c_language = c_language
|
32
|
+
super()
|
33
|
+
end
|
34
|
+
|
35
|
+
root :root
|
36
|
+
|
37
|
+
include ALanguage
|
38
|
+
|
39
|
+
rule(:root) { str('a(') >> a_language >> str(')') >> space |
|
40
|
+
str('b(') >> BLanguage.new >> str(')') >> space |
|
41
|
+
str('c(') >> @c_language >> str(')') >> space }
|
42
|
+
rule(:space) { str(' ').maybe }
|
43
|
+
end
|
44
|
+
|
45
|
+
Language.new(c_language).parse('a(aaa)')
|
46
|
+
Language.new(c_language).parse('b(bbb)')
|
47
|
+
Language.new(c_language).parse('c(ccc)')
|
@@ -0,0 +1,132 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
require 'parslet/convenience'
|
5
|
+
|
6
|
+
# This example demonstrates tree error reporting in a real life example.
|
7
|
+
# The parser code has been contributed by John Mettraux.
|
8
|
+
|
9
|
+
def prettify(str)
|
10
|
+
puts " "*3 + " "*4 + "." + " "*4 + "10" + " "*3 + "." + " "*4 + "20"
|
11
|
+
str.lines.each_with_index do |line, index|
|
12
|
+
printf "%02d %s\n",
|
13
|
+
index+1,
|
14
|
+
line.chomp
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class Parser < Parslet::Parser
|
19
|
+
|
20
|
+
# commons
|
21
|
+
|
22
|
+
rule(:space) { match('[ \t]').repeat(1) }
|
23
|
+
rule(:space?) { space.maybe }
|
24
|
+
|
25
|
+
rule(:newline) { match('[\r\n]') }
|
26
|
+
|
27
|
+
rule(:comment) { str('#') >> match('[^\r\n]').repeat }
|
28
|
+
|
29
|
+
rule(:line_separator) {
|
30
|
+
(space? >> ((comment.maybe >> newline) | str(';')) >> space?).repeat(1)
|
31
|
+
}
|
32
|
+
|
33
|
+
rule(:blank) { line_separator | space }
|
34
|
+
rule(:blank?) { blank.maybe }
|
35
|
+
|
36
|
+
rule(:identifier) { match('[a-zA-Z0-9_]').repeat(1) }
|
37
|
+
|
38
|
+
# res_statement
|
39
|
+
|
40
|
+
rule(:reference) {
|
41
|
+
(str('@').repeat(1,2) >> identifier).as(:reference)
|
42
|
+
}
|
43
|
+
|
44
|
+
rule(:res_action_or_link) {
|
45
|
+
str('.').as(:dot) >> (identifier >> str('?').maybe ).as(:name) >> str('()')
|
46
|
+
}
|
47
|
+
|
48
|
+
rule(:res_actions) {
|
49
|
+
(
|
50
|
+
reference
|
51
|
+
).as(:resources) >>
|
52
|
+
(
|
53
|
+
res_action_or_link.as(:res_action)
|
54
|
+
).repeat(0).as(:res_actions)
|
55
|
+
}
|
56
|
+
|
57
|
+
rule(:res_statement) {
|
58
|
+
res_actions >>
|
59
|
+
(str(':') >> identifier.as(:name)).maybe.as(:res_field)
|
60
|
+
}
|
61
|
+
|
62
|
+
# expression
|
63
|
+
|
64
|
+
rule(:expression) {
|
65
|
+
res_statement
|
66
|
+
}
|
67
|
+
|
68
|
+
# body
|
69
|
+
|
70
|
+
rule(:body) {
|
71
|
+
(line_separator >> (block | expression)).repeat(1).as(:body) >>
|
72
|
+
line_separator
|
73
|
+
}
|
74
|
+
|
75
|
+
# blocks
|
76
|
+
|
77
|
+
rule(:begin_block) {
|
78
|
+
(str('concurrent').as(:type) >> space).maybe.as(:pre) >>
|
79
|
+
str('begin').as(:begin) >>
|
80
|
+
body >>
|
81
|
+
str('end')
|
82
|
+
}
|
83
|
+
|
84
|
+
rule(:define_block) {
|
85
|
+
str('define').as(:define) >> space >>
|
86
|
+
identifier.as(:name) >> str('()') >>
|
87
|
+
body >>
|
88
|
+
str('end')
|
89
|
+
}
|
90
|
+
|
91
|
+
rule(:block) {
|
92
|
+
define_block | begin_block
|
93
|
+
}
|
94
|
+
|
95
|
+
# root
|
96
|
+
|
97
|
+
rule(:radix) {
|
98
|
+
line_separator.maybe >> block >> line_separator.maybe
|
99
|
+
}
|
100
|
+
|
101
|
+
root(:radix)
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
ds = [
|
106
|
+
%{
|
107
|
+
define f()
|
108
|
+
@res.name
|
109
|
+
end
|
110
|
+
},
|
111
|
+
%{
|
112
|
+
define f()
|
113
|
+
begin
|
114
|
+
@res.name
|
115
|
+
end
|
116
|
+
end
|
117
|
+
}
|
118
|
+
]
|
119
|
+
|
120
|
+
ds.each do |d|
|
121
|
+
|
122
|
+
puts '-' * 80
|
123
|
+
prettify(d)
|
124
|
+
|
125
|
+
parser = Parser.new
|
126
|
+
|
127
|
+
begin
|
128
|
+
parser.parse_with_debug(d)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
puts '-' * 80
|