parslet 1.2.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/HISTORY.txt +14 -1
- data/README +11 -7
- data/example/boolean_algebra.rb +70 -0
- data/example/json.rb +131 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/json.out +5 -0
- data/example/output/sentence.out +1 -0
- data/example/sentence.rb +36 -0
- data/lib/parslet.rb +8 -0
- data/lib/parslet/atoms/base.rb +7 -5
- data/lib/parslet/convenience.rb +2 -0
- data/lib/parslet/slice.rb +32 -76
- data/lib/parslet/source.rb +20 -61
- metadata +9 -5
data/HISTORY.txt
CHANGED
@@ -2,8 +2,21 @@
|
|
2
2
|
|
3
3
|
- prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
|
4
4
|
the win.
|
5
|
+
|
6
|
+
= 1.3.0 / ???
|
7
|
+
|
8
|
+
Next bigger release, features not clear yet. Probably heredoc-parsing.
|
9
|
+
|
10
|
+
= 1.2.1 / 6Jun2011
|
11
|
+
|
12
|
+
! FIX: Input at the end of a parse raises Parslet::UnconsumedInput. (see
|
13
|
+
issue 18)
|
14
|
+
|
15
|
+
! FIX: Unicode parsing should now work as expected. (see issue 38)
|
16
|
+
|
17
|
+
! FIX: Slice#slice returned wrong bits at times (see issue 36).
|
5
18
|
|
6
|
-
= 1.2.0 /
|
19
|
+
= 1.2.0 / 4Feb2011
|
7
20
|
|
8
21
|
+ Parslet::Parser is now also a grammar atom, it can be composed freely with
|
9
22
|
other atoms. (str('f') >> MiniLispParser.new >> str('b'))
|
data/README
CHANGED
@@ -5,10 +5,10 @@ Parslet makes developing complex parsers easy. It does so by
|
|
5
5
|
* providing the best <b>error reporting</b> possible
|
6
6
|
* <b>not generating</b> reams of code for you to debug
|
7
7
|
|
8
|
-
Parslet takes the long way around to make <b>your job</b> easier. It allows
|
9
|
-
incremental language construction. Often, you start out small,
|
10
|
-
the atoms of your language first; _parslet_ takes pride in making
|
11
|
-
possible.
|
8
|
+
Parslet takes the long way around to make <b>your job</b> easier. It allows
|
9
|
+
for incremental language construction. Often, you start out small,
|
10
|
+
implementing the atoms of your language first; _parslet_ takes pride in making
|
11
|
+
this possible.
|
12
12
|
|
13
13
|
Eager to try this out? Please see the associated web site:
|
14
14
|
http://kschiess.github.com/parslet
|
@@ -41,11 +41,15 @@ SYNOPSIS
|
|
41
41
|
|
42
42
|
COMPATIBILITY
|
43
43
|
|
44
|
-
This library should work with most rubies. I've tested it with MRI 1.8
|
45
|
-
rbx-head, jruby. Please report as a bug if you encounter
|
44
|
+
This library should work with most rubies. I've tested it with MRI 1.8
|
45
|
+
(except 1.8.6), 1.9, rbx-head, jruby. Please report as a bug if you encounter
|
46
|
+
issues.
|
47
|
+
|
48
|
+
Note that due to Ruby 1.8 internals, Unicode parsing is not supported on that
|
49
|
+
version.
|
46
50
|
|
47
51
|
STATUS
|
48
52
|
|
49
|
-
At version 1.1 -
|
53
|
+
At version 1.2.1 - See HISTORY.txt for changes.
|
50
54
|
|
51
55
|
(c) 2010 Kaspar Schiess
|
@@ -0,0 +1,70 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
require "parslet"
|
4
|
+
require "pp"
|
5
|
+
|
6
|
+
# Parses strings like "var1 and (var2 or var3)" respecting operator precedence
|
7
|
+
# and parentheses. After that transforms the parse tree into an array of
|
8
|
+
# arrays like this:
|
9
|
+
#
|
10
|
+
# [["1", "2"], ["1", "3"]]
|
11
|
+
#
|
12
|
+
# The array represents a DNF (disjunctive normal form). Elements of outer
|
13
|
+
# array are connected with "or" operator, while elements of inner arrays are
|
14
|
+
# joined with "and".
|
15
|
+
#
|
16
|
+
class Parser < Parslet::Parser
|
17
|
+
rule(:space) { match[" "].repeat(1) }
|
18
|
+
rule(:space?) { space.maybe }
|
19
|
+
|
20
|
+
rule(:lparen) { str("(") >> space? }
|
21
|
+
rule(:rparen) { str(")") >> space? }
|
22
|
+
|
23
|
+
rule(:and_operator) { str("and") >> space? }
|
24
|
+
rule(:or_operator) { str("or") >> space? }
|
25
|
+
|
26
|
+
rule(:var) { str("var") >> match["0-9"].repeat(1).as(:var) >> space? }
|
27
|
+
|
28
|
+
# The primary rule deals with parentheses.
|
29
|
+
rule(:primary) { lparen >> or_operation >> rparen | var }
|
30
|
+
|
31
|
+
# Note that following rules are both right-recursive.
|
32
|
+
rule(:and_operation) {
|
33
|
+
(primary.as(:left) >> and_operator >>
|
34
|
+
and_operation.as(:right)).as(:and) |
|
35
|
+
primary }
|
36
|
+
|
37
|
+
rule(:or_operation) {
|
38
|
+
(and_operation.as(:left) >> or_operator >>
|
39
|
+
or_operation.as(:right)).as(:or) |
|
40
|
+
and_operation }
|
41
|
+
|
42
|
+
# We start at the lowest precedence rule.
|
43
|
+
root(:or_operation)
|
44
|
+
end
|
45
|
+
|
46
|
+
class Transformer < Parslet::Transform
|
47
|
+
rule(:var => simple(:var)) { [[String(var)]] }
|
48
|
+
|
49
|
+
rule(:or => { :left => subtree(:left), :right => subtree(:right) }) do
|
50
|
+
(left + right)
|
51
|
+
end
|
52
|
+
|
53
|
+
rule(:and => { :left => subtree(:left), :right => subtree(:right) }) do
|
54
|
+
res = []
|
55
|
+
left.each do |l|
|
56
|
+
right.each do |r|
|
57
|
+
res << (l + r)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
res
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
pp tree = Parser.new.parse("var1 and (var2 or var3)")
|
65
|
+
# {:and=>
|
66
|
+
# {:left=>{:var=>"1"@3},
|
67
|
+
# :right=>{:or=>{:left=>{:var=>"2"@13}, :right=>{:var=>"3"@21}}}}}
|
68
|
+
pp Transformer.new.apply(tree)
|
69
|
+
# [["1", "2"], ["1", "3"]]
|
70
|
+
|
data/example/json.rb
ADDED
@@ -0,0 +1,131 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
#
|
4
|
+
# MIT License - (c) 2011 John Mettraux
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'parslet' # gem install parslet
|
9
|
+
|
10
|
+
|
11
|
+
module MyJson
|
12
|
+
|
13
|
+
class Parser < Parslet::Parser
|
14
|
+
|
15
|
+
rule(:spaces) { match('\s').repeat(1) }
|
16
|
+
rule(:spaces?) { spaces.maybe }
|
17
|
+
|
18
|
+
rule(:comma) { spaces? >> str(',') >> spaces? }
|
19
|
+
rule(:digit) { match('[0-9]') }
|
20
|
+
|
21
|
+
rule(:number) {
|
22
|
+
(
|
23
|
+
str('-').maybe >> (
|
24
|
+
str('0') | (match('[1-9]') >> digit.repeat)
|
25
|
+
) >> (
|
26
|
+
str('.') >> digit.repeat(1)
|
27
|
+
).maybe >> (
|
28
|
+
match('[eE]') >> (str('+') | str('-')).maybe >> digit.repeat(1)
|
29
|
+
).maybe
|
30
|
+
).as(:number)
|
31
|
+
}
|
32
|
+
|
33
|
+
rule(:string) {
|
34
|
+
str('"') >> (
|
35
|
+
str('\\') >> any | str('"').absent? >> any
|
36
|
+
).repeat.as(:string) >> str('"')
|
37
|
+
}
|
38
|
+
|
39
|
+
rule(:array) {
|
40
|
+
str('[') >> spaces? >>
|
41
|
+
(value >> (comma >> value).repeat).maybe.as(:array) >>
|
42
|
+
spaces? >> str(']')
|
43
|
+
}
|
44
|
+
|
45
|
+
rule(:object) {
|
46
|
+
str('{') >> spaces? >>
|
47
|
+
(entry >> (comma >> entry).repeat).maybe.as(:object) >>
|
48
|
+
spaces? >> str('}')
|
49
|
+
}
|
50
|
+
|
51
|
+
rule(:value) {
|
52
|
+
string | number |
|
53
|
+
object | array |
|
54
|
+
str('true').as(:true) | str('false').as(:false) |
|
55
|
+
str('null').as(:null)
|
56
|
+
}
|
57
|
+
|
58
|
+
rule(:entry) {
|
59
|
+
(
|
60
|
+
string.as(:key) >> spaces? >>
|
61
|
+
str(':') >> spaces? >>
|
62
|
+
value.as(:val)
|
63
|
+
).as(:entry)
|
64
|
+
}
|
65
|
+
|
66
|
+
rule(:attribute) { (entry | value).as(:attribute) }
|
67
|
+
|
68
|
+
rule(:top) { spaces? >> value >> spaces? }
|
69
|
+
|
70
|
+
root(:top)
|
71
|
+
end
|
72
|
+
|
73
|
+
class Transformer < Parslet::Transform
|
74
|
+
|
75
|
+
class Entry < Struct.new(:key, :val); end
|
76
|
+
|
77
|
+
rule(:array => subtree(:ar)) {
|
78
|
+
ar.is_a?(Array) ? ar : [ ar ]
|
79
|
+
}
|
80
|
+
rule(:object => subtree(:ob)) {
|
81
|
+
(ob.is_a?(Array) ? ob : [ ob ]).inject({}) { |h, e| h[e.key] = e.val; h }
|
82
|
+
}
|
83
|
+
|
84
|
+
rule(:entry => { :key => simple(:ke), :val => simple(:va) }) {
|
85
|
+
Entry.new(ke, va)
|
86
|
+
}
|
87
|
+
|
88
|
+
rule(:string => simple(:st)) {
|
89
|
+
st.to_s
|
90
|
+
}
|
91
|
+
rule(:number => simple(:nb)) {
|
92
|
+
nb.match(/[eE\.]/) ? Float(nb) : Integer(nb)
|
93
|
+
}
|
94
|
+
|
95
|
+
rule(:null => simple(:nu)) { nil }
|
96
|
+
rule(:true => simple(:tr)) { true }
|
97
|
+
rule(:false => simple(:fa)) { false }
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.parse(s)
|
101
|
+
|
102
|
+
parser = Parser.new
|
103
|
+
transformer = Transformer.new
|
104
|
+
|
105
|
+
tree = parser.parse(s)
|
106
|
+
puts; p tree; puts
|
107
|
+
out = transformer.apply(tree)
|
108
|
+
|
109
|
+
out
|
110
|
+
|
111
|
+
rescue Parslet::ParseFailed => e
|
112
|
+
puts e, parser.root.error_tree
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
s = %{
|
118
|
+
[ 1, 2, 3, null,
|
119
|
+
"asdfasdf asdfds", { "a": -1.2 }, { "b": true, "c": false },
|
120
|
+
0.1e24, true, false, [ 1 ] ]
|
121
|
+
}
|
122
|
+
|
123
|
+
out = MyJson.parse(s)
|
124
|
+
|
125
|
+
p out; puts
|
126
|
+
|
127
|
+
out == [
|
128
|
+
1, 2, 3, nil,
|
129
|
+
"asdfasdf asdfds", { "a" => -1.2 }, { "b" => true, "c" => false },
|
130
|
+
0.1e24, true, false, [ 1 ]
|
131
|
+
] || raise("MyJson is a failure")
|
@@ -0,0 +1,5 @@
|
|
1
|
+
|
2
|
+
{:array=>[{:number=>"1"@5}, {:number=>"2"@8}, {:number=>"3"@11}, {:null=>"null"@14}, {:string=>"asdfasdf asdfds"@25}, {:object=>{:entry=>{:key=>{:string=>"a"@46}, :val=>{:number=>"-1.2"@50}}}}, {:object=>[{:entry=>{:key=>{:string=>"b"@61}, :val=>{:true=>"true"@65}}}, {:entry=>{:key=>{:string=>"c"@72}, :val=>{:false=>"false"@76}}}]}, {:number=>"0.1e24"@89}, {:true=>"true"@97}, {:false=>"false"@103}, {:array=>{:number=>"1"@112}}]}
|
3
|
+
|
4
|
+
[1, 2, 3, nil, "asdfasdf asdfds", {"a"=>-1.2}, {"b"=>true, "c"=>false}, 1.0e+23, true, false, [1]]
|
5
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
["RubyKaigi2009のテーマは、「変わる/変える」です。", " 前回のRubyKaigi2008のテーマであった「多様性」の言葉の通り、 2008年はRubyそのものに関しても、またRubyの活躍する舞台に関しても、 ますます多様化が進みつつあります。", "RubyKaigi2008は、そのような Rubyの生態系をあらためて認識する場となりました。", " しかし、こうした多様化が進む中、異なる者同士が単純に距離を 置いたままでは、その違いを認識したところであまり意味がありません。", " 異なる実装、異なる思想、異なる背景といった、様々な多様性を理解しつつ、 すり合わせるべきものをすり合わせ、変えていくべきところを 変えていくことが、豊かな未来へとつながる道に違いありません。"]
|
data/example/sentence.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# A small example contributed by John Mettraux (jmettraux) that demonstrates
|
4
|
+
# working with Unicode. This only works on Ruby 1.9.
|
5
|
+
|
6
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
7
|
+
|
8
|
+
require 'parslet'
|
9
|
+
|
10
|
+
class Parser < Parslet::Parser
|
11
|
+
rule(:sentence) { (match('[^。]').repeat(1) >> str("。")).as(:sentence) }
|
12
|
+
rule(:sentences) { sentence.repeat }
|
13
|
+
root(:sentences)
|
14
|
+
end
|
15
|
+
|
16
|
+
class Transformer < Parslet::Transform
|
17
|
+
rule(:sentence => simple(:sen)) { sen.to_s }
|
18
|
+
end
|
19
|
+
|
20
|
+
string =
|
21
|
+
"RubyKaigi2009のテーマは、「変わる/変える」です。 前回の" +
|
22
|
+
"RubyKaigi2008のテーマであった「多様性」の言葉の通り、 " +
|
23
|
+
"2008年はRubyそのものに関しても、またRubyの活躍する舞台に関しても、 " +
|
24
|
+
"ますます多様化が進みつつあります。RubyKaigi2008は、そのような " +
|
25
|
+
"Rubyの生態系をあらためて認識する場となりました。 しかし、" +
|
26
|
+
"こうした多様化が進む中、異なる者同士が単純に距離を 置いたままでは、" +
|
27
|
+
"その違いを認識したところであまり意味がありません。 異なる実装、" +
|
28
|
+
"異なる思想、異なる背景といった、様々な多様性を理解しつつ、 " +
|
29
|
+
"すり合わせるべきものをすり合わせ、変えていくべきところを " +
|
30
|
+
"変えていくことが、豊かな未来へとつながる道に違いありません。"
|
31
|
+
|
32
|
+
parser = Parser.new
|
33
|
+
transformer = Transformer.new
|
34
|
+
|
35
|
+
tree = parser.parse(string)
|
36
|
+
p transformer.apply(tree)
|
data/lib/parslet.rb
CHANGED
@@ -75,6 +75,14 @@ module Parslet
|
|
75
75
|
class ParseFailed < StandardError
|
76
76
|
end
|
77
77
|
|
78
|
+
# Raised when the parse operation didn't consume all of its input. In this
|
79
|
+
# case, it makes only limited sense to look at the error tree. Maybe the
|
80
|
+
# parser worked just fine, but didn't account for the characters at the tail
|
81
|
+
# of the input?
|
82
|
+
#
|
83
|
+
class UnconsumedInput < ParseFailed
|
84
|
+
end
|
85
|
+
|
78
86
|
module ClassMethods
|
79
87
|
# Define an entity for the parser. This generates a method of the same
|
80
88
|
# name that can be used as part of other patterns. Those methods can be
|
data/lib/parslet/atoms/base.rb
CHANGED
@@ -48,14 +48,16 @@ class Parslet::Atoms::Base
|
|
48
48
|
# error to fail with. Otherwise just report that we cannot consume the
|
49
49
|
# input.
|
50
50
|
if cause
|
51
|
-
#
|
52
|
-
raise
|
51
|
+
# We're not using #parse_failed here, since it assigns to @last_cause.
|
52
|
+
# Still: We'll raise this differently, since the real cause is different.
|
53
|
+
raise Parslet::UnconsumedInput,
|
53
54
|
"Unconsumed input, maybe because of this: #{cause}"
|
54
55
|
else
|
55
56
|
old_pos = source.pos
|
56
57
|
parse_failed(
|
57
58
|
format_cause(source,
|
58
|
-
"Don't know what to do with #{source.read(100)}", old_pos)
|
59
|
+
"Don't know what to do with #{source.read(100)}", old_pos),
|
60
|
+
Parslet::UnconsumedInput)
|
59
61
|
end
|
60
62
|
end
|
61
63
|
|
@@ -246,9 +248,9 @@ private
|
|
246
248
|
# Signals to the outside that the parse has failed. Use this in conjunction
|
247
249
|
# with #format_cause for nice error messages.
|
248
250
|
#
|
249
|
-
def parse_failed(cause)
|
251
|
+
def parse_failed(cause, exception_klass=Parslet::ParseFailed)
|
250
252
|
@last_cause = cause
|
251
|
-
raise
|
253
|
+
raise exception_klass,
|
252
254
|
@last_cause.to_s
|
253
255
|
end
|
254
256
|
|
data/lib/parslet/convenience.rb
CHANGED
data/lib/parslet/slice.rb
CHANGED
@@ -1,24 +1,24 @@
|
|
1
1
|
|
2
2
|
# A slice is a small part from the parse input. A slice mainly behaves like
|
3
3
|
# any other string, except that it remembers where it came from (offset in
|
4
|
-
# original input).
|
4
|
+
# original input).
|
5
5
|
#
|
6
6
|
# Some slices also know what parent slice they are a small part of. This
|
7
7
|
# allows the slice to be concatenated to other slices from the same buffer by
|
8
|
-
# reslicing it against that original buffer.
|
8
|
+
# reslicing it against that original buffer.
|
9
9
|
#
|
10
10
|
# Why the complexity? Slices allow retaining offset information. This will
|
11
11
|
# allow to assign line and column to each small bit of output from the parslet
|
12
12
|
# parser. Also, while we keep that information, we might as well try to do
|
13
13
|
# something useful with it. Reslicing the same buffers should in theory keep
|
14
|
-
# buffer copies and allocations down.
|
14
|
+
# buffer copies and allocations down.
|
15
15
|
#
|
16
16
|
# == Extracting line and column
|
17
17
|
#
|
18
18
|
# Using the #line_and_column method, you can extract the line and column in
|
19
|
-
# the original input where this slice starts.
|
19
|
+
# the original input where this slice starts.
|
20
20
|
#
|
21
|
-
# Example:
|
21
|
+
# Example:
|
22
22
|
# slice.line_and_column # => [1, 13]
|
23
23
|
# slice.offset # => 12
|
24
24
|
#
|
@@ -30,117 +30,73 @@
|
|
30
30
|
# calling #to_s.
|
31
31
|
#
|
32
32
|
# These omissions are somewhat intentional. Rather than maintaining a full
|
33
|
-
# delegation, we opt for a partial emulation that gets the job done.
|
33
|
+
# delegation, we opt for a partial emulation that gets the job done.
|
34
34
|
#
|
35
35
|
# Note also that there are some things that work with strings that will never
|
36
36
|
# work when using slices. For instance, you cannot concatenate slices that
|
37
|
-
# aren't from the same source or that don't join up:
|
37
|
+
# aren't from the same source or that don't join up:
|
38
38
|
#
|
39
|
-
# Example:
|
39
|
+
# Example:
|
40
40
|
# big_slice = 'abcdef'
|
41
41
|
# a = big_slice.slice(0, 2) # => "ab"@0
|
42
42
|
# b = big_slice.slice(4, 2) # => "ef"@4
|
43
|
-
#
|
43
|
+
#
|
44
44
|
# a + b # raises Parslet::InvalidSliceOperation
|
45
45
|
#
|
46
46
|
# This avoids creating slices with impossible offsets or that are
|
47
|
-
# discontinous.
|
47
|
+
# discontinous.
|
48
48
|
#
|
49
49
|
class Parslet::Slice
|
50
50
|
attr_reader :str, :offset
|
51
|
-
attr_reader :parent
|
52
51
|
attr_reader :source
|
53
|
-
|
54
|
-
def initialize(string, offset, source=nil
|
52
|
+
|
53
|
+
def initialize(string, offset, source=nil)
|
55
54
|
@str, @offset = string, offset
|
56
55
|
@source = source
|
57
|
-
@parent = parent
|
58
56
|
end
|
59
|
-
|
60
|
-
# Compares slices to other slices or strings.
|
57
|
+
|
58
|
+
# Compares slices to other slices or strings.
|
61
59
|
#
|
62
60
|
def == other
|
63
61
|
str == other
|
64
62
|
end
|
65
|
-
|
66
|
-
# Match regular expressions.
|
67
|
-
#
|
63
|
+
|
64
|
+
# Match regular expressions.
|
65
|
+
#
|
68
66
|
def match(regexp)
|
69
67
|
str.match(regexp)
|
70
68
|
end
|
71
|
-
|
72
|
-
# Returns
|
73
|
-
# Whenever possible, return parts of the parent buffer that this slice was
|
74
|
-
# cut out of.
|
75
|
-
#
|
76
|
-
def slice(start, length)
|
77
|
-
# NOTE: At a later stage, we might not want to create huge trees of slices.
|
78
|
-
# The fact that the root of the tree creates slices that link to it makes
|
79
|
-
# the tree already rather flat.
|
80
|
-
|
81
|
-
if parent
|
82
|
-
parent.slice(offset - parent.offset, length)
|
83
|
-
else
|
84
|
-
self.class.new(str.slice(start, length), offset+start, source, self)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
# Returns a slice that starts at file offset start and that has length
|
89
|
-
# characters in it.
|
90
|
-
#
|
91
|
-
def abs_slice(start, length)
|
92
|
-
slice(start-offset, length)
|
93
|
-
end
|
94
|
-
|
95
|
-
# True if this slice can satisfy an original input request to the
|
96
|
-
# range ofs, len.
|
69
|
+
|
70
|
+
# Returns the slices size in characters.
|
97
71
|
#
|
98
|
-
def satisfies?(ofs, len)
|
99
|
-
ofs >= offset && (ofs-offset+len-1)<str.size
|
100
|
-
end
|
101
|
-
|
102
72
|
def size
|
103
73
|
str.size
|
104
74
|
end
|
75
|
+
|
76
|
+
# Concatenate two slices; it is assumed that the second slice begins
|
77
|
+
# where the first one ends. The offset of the resulting slice is the same
|
78
|
+
# as the one of this slice.
|
79
|
+
#
|
105
80
|
def +(other)
|
106
|
-
|
107
|
-
"Cannot concat something other than a slice to a slice." \
|
108
|
-
unless other.respond_to?(:to_slice)
|
109
|
-
|
110
|
-
raise Parslet::InvalidSliceOperation,
|
111
|
-
"Cannot join slices that aren't adjacent."+
|
112
|
-
" (#{self.inspect} + #{other.inspect})" \
|
113
|
-
if offset+size != other.offset
|
114
|
-
|
115
|
-
raise Parslet::InvalidSliceOperation, "Not from the same source." \
|
116
|
-
if source != other.source
|
117
|
-
|
118
|
-
# If both slices stem from the same bigger buffer, we can reslice that
|
119
|
-
# buffer to (probably) avoid a buffer copy, as long as the strings are
|
120
|
-
# not modified.
|
121
|
-
if parent && parent == other.parent
|
122
|
-
return parent.abs_slice(offset, size+other.size)
|
123
|
-
end
|
124
|
-
|
125
|
-
self.class.new(str + other.str, offset, source)
|
81
|
+
self.class.new(str + other.to_s, offset, source)
|
126
82
|
end
|
127
|
-
|
128
|
-
# Returns a <line, column> tuple referring to the original input.
|
83
|
+
|
84
|
+
# Returns a <line, column> tuple referring to the original input.
|
129
85
|
#
|
130
86
|
def line_and_column
|
131
87
|
raise ArgumentError, "No source was given, cannot infer line and column." \
|
132
88
|
unless source
|
133
|
-
|
89
|
+
|
134
90
|
source.line_and_column(self.offset)
|
135
91
|
end
|
136
92
|
|
137
|
-
|
93
|
+
|
138
94
|
# Conversion operators -----------------------------------------------------
|
139
95
|
def to_str
|
140
96
|
str
|
141
97
|
end
|
142
98
|
alias to_s to_str
|
143
|
-
|
99
|
+
|
144
100
|
def to_slice
|
145
101
|
self
|
146
102
|
end
|
@@ -156,7 +112,7 @@ class Parslet::Slice
|
|
156
112
|
def to_f
|
157
113
|
str.to_f
|
158
114
|
end
|
159
|
-
|
115
|
+
|
160
116
|
# Inspection & Debugging ---------------------------------------------------
|
161
117
|
|
162
118
|
# Prints the slice as <code>"string"@offset</code>.
|
@@ -165,7 +121,7 @@ class Parslet::Slice
|
|
165
121
|
end
|
166
122
|
end
|
167
123
|
|
168
|
-
# Raised when trying to do an operation on slices that cannot succeed, like
|
124
|
+
# Raised when trying to do an operation on slices that cannot succeed, like
|
169
125
|
# adding non-adjacent slices. See Parslet::Slice.
|
170
126
|
#
|
171
127
|
class Parslet::InvalidSliceOperation < StandardError
|
data/lib/parslet/source.rb
CHANGED
@@ -14,32 +14,25 @@ class Parslet::Source
|
|
14
14
|
end
|
15
15
|
|
16
16
|
@io = io
|
17
|
-
@virtual_position = @io.pos
|
18
|
-
@eof_position = nil
|
19
|
-
|
20
17
|
@line_cache = LineCache.new
|
21
|
-
|
22
|
-
# Stores an array of <offset, buffer> tuples.
|
23
|
-
@slices = []
|
24
18
|
end
|
25
19
|
|
26
20
|
# Reads n chars from the input and returns a Range instance.
|
27
21
|
#
|
28
22
|
def read(n)
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
slice
|
23
|
+
raise ArgumentError, "Cannot read <= 1 characters at a time." \
|
24
|
+
if n < 1
|
25
|
+
read_slice(n)
|
33
26
|
end
|
34
27
|
|
35
28
|
def eof?
|
36
|
-
@
|
29
|
+
@io.eof?
|
37
30
|
end
|
38
31
|
def pos
|
39
|
-
@
|
32
|
+
@io.pos
|
40
33
|
end
|
41
34
|
def pos=(new_pos)
|
42
|
-
@
|
35
|
+
@io.pos = new_pos
|
43
36
|
end
|
44
37
|
|
45
38
|
# Returns a <line, column> tuple for the given position. If no position is
|
@@ -51,59 +44,25 @@ class Parslet::Source
|
|
51
44
|
end
|
52
45
|
|
53
46
|
private
|
54
|
-
# Minimal size of a single read
|
55
|
-
MIN_READ_SIZE = 10 * 1024
|
56
|
-
# Number of slices to keep
|
57
|
-
BUFFER_CACHE_SIZE = 10
|
58
|
-
|
59
|
-
# Reads and returns a piece of the input that contains length chars starting
|
60
|
-
# at offset.
|
61
|
-
#
|
62
|
-
def read_from_cache(offset, length)
|
63
|
-
# Do we already have a buffer that contains the given range?
|
64
|
-
# Return that.
|
65
|
-
slice = @slices.find { |slice|
|
66
|
-
slice.satisfies?(offset, length) }
|
67
|
-
return slice.abs_slice(offset, length) if slice
|
68
|
-
|
69
|
-
# Read a new buffer: Can the demand be satisfied by sequentially reading
|
70
|
-
# from the current position?
|
71
|
-
needed = offset-@io.pos+length
|
72
|
-
if @io.pos <= offset && needed<MIN_READ_SIZE
|
73
|
-
# read the slice
|
74
|
-
slice = read_slice(needed)
|
75
|
-
return slice.abs_slice(offset, length)
|
76
|
-
end
|
77
|
-
|
78
|
-
# Otherwise seek and read enough so that we can satisfy the demand.
|
79
|
-
@io.pos = offset
|
80
|
-
|
81
|
-
slice = read_slice(needed)
|
82
|
-
return slice.abs_slice(offset, length)
|
83
|
-
end
|
84
|
-
|
85
47
|
def read_slice(needed)
|
86
48
|
start = @io.pos
|
87
|
-
|
88
|
-
buf = @io.read(request)
|
89
|
-
|
90
|
-
# remember eof position
|
91
|
-
if !buf || buf.size<request
|
92
|
-
@eof_position = @io.pos
|
93
|
-
end
|
49
|
+
buf = @io.gets(nil, needed)
|
94
50
|
|
95
51
|
# cache line ends
|
96
52
|
@line_cache.scan_for_line_endings(start, buf)
|
97
53
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
54
|
+
Parslet::Slice.new(buf || '', start, self)
|
55
|
+
end
|
56
|
+
|
57
|
+
if RUBY_VERSION !~ /^1.9/
|
58
|
+
def read_slice(needed)
|
59
|
+
start = @io.pos
|
60
|
+
buf = @io.read(needed)
|
61
|
+
|
62
|
+
# cache line ends
|
63
|
+
@line_cache.scan_for_line_endings(start, buf)
|
64
|
+
|
65
|
+
Parslet::Slice.new(buf || '', start, self)
|
66
|
+
end
|
108
67
|
end
|
109
68
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: parslet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.2.
|
5
|
+
version: 1.2.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Kaspar Schiess
|
@@ -10,8 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
14
|
-
default_executable:
|
13
|
+
date: 2011-06-05 00:00:00 Z
|
15
14
|
dependencies:
|
16
15
|
- !ruby/object:Gem::Dependency
|
17
16
|
name: blankslate
|
@@ -100,14 +99,17 @@ files:
|
|
100
99
|
- lib/parslet/source.rb
|
101
100
|
- lib/parslet/transform.rb
|
102
101
|
- lib/parslet.rb
|
102
|
+
- example/boolean_algebra.rb
|
103
103
|
- example/comments.rb
|
104
104
|
- example/documentation.rb
|
105
105
|
- example/email_parser.rb
|
106
106
|
- example/empty.rb
|
107
107
|
- example/erb.rb
|
108
108
|
- example/ip_address.rb
|
109
|
+
- example/json.rb
|
109
110
|
- example/local.rb
|
110
111
|
- example/minilisp.rb
|
112
|
+
- example/output/boolean_algebra.out
|
111
113
|
- example/output/comments.out
|
112
114
|
- example/output/documentation.err
|
113
115
|
- example/output/documentation.out
|
@@ -115,21 +117,23 @@ files:
|
|
115
117
|
- example/output/empty.err
|
116
118
|
- example/output/erb.out
|
117
119
|
- example/output/ip_address.out
|
120
|
+
- example/output/json.out
|
118
121
|
- example/output/local.out
|
119
122
|
- example/output/minilisp.out
|
120
123
|
- example/output/parens.out
|
121
124
|
- example/output/readme.out
|
122
125
|
- example/output/seasons.out
|
126
|
+
- example/output/sentence.out
|
123
127
|
- example/output/simple_xml.out
|
124
128
|
- example/output/string_parser.out
|
125
129
|
- example/parens.rb
|
126
130
|
- example/readme.rb
|
127
131
|
- example/seasons.rb
|
132
|
+
- example/sentence.rb
|
128
133
|
- example/simple.lit
|
129
134
|
- example/simple_xml.rb
|
130
135
|
- example/string_parser.rb
|
131
136
|
- example/test.lit
|
132
|
-
has_rdoc: true
|
133
137
|
homepage: http://kschiess.github.com/parslet
|
134
138
|
licenses: []
|
135
139
|
|
@@ -154,7 +158,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
158
|
requirements: []
|
155
159
|
|
156
160
|
rubyforge_project:
|
157
|
-
rubygems_version: 1.5
|
161
|
+
rubygems_version: 1.8.5
|
158
162
|
signing_key:
|
159
163
|
specification_version: 3
|
160
164
|
summary: Parser construction library with great error reporting in Ruby.
|