parslet 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/HISTORY.txt +14 -1
- data/README +11 -7
- data/example/boolean_algebra.rb +70 -0
- data/example/json.rb +131 -0
- data/example/output/boolean_algebra.out +4 -0
- data/example/output/json.out +5 -0
- data/example/output/sentence.out +1 -0
- data/example/sentence.rb +36 -0
- data/lib/parslet.rb +8 -0
- data/lib/parslet/atoms/base.rb +7 -5
- data/lib/parslet/convenience.rb +2 -0
- data/lib/parslet/slice.rb +32 -76
- data/lib/parslet/source.rb +20 -61
- metadata +9 -5
data/HISTORY.txt
CHANGED
@@ -2,8 +2,21 @@
|
|
2
2
|
|
3
3
|
- prsnt? and absnt? are now finally banned into oblivion. Wasting vocals for
|
4
4
|
the win.
|
5
|
+
|
6
|
+
= 1.3.0 / ???
|
7
|
+
|
8
|
+
Next bigger release, features not clear yet. Probably heredoc-parsing.
|
9
|
+
|
10
|
+
= 1.2.1 / 6Jun2011
|
11
|
+
|
12
|
+
! FIX: Input at the end of a parse raises Parslet::UnconsumedInput. (see
|
13
|
+
issue 18)
|
14
|
+
|
15
|
+
! FIX: Unicode parsing should now work as expected. (see issue 38)
|
16
|
+
|
17
|
+
! FIX: Slice#slice returned wrong bits at times (see issue 36).
|
5
18
|
|
6
|
-
= 1.2.0 /
|
19
|
+
= 1.2.0 / 4Feb2011
|
7
20
|
|
8
21
|
+ Parslet::Parser is now also a grammar atom, it can be composed freely with
|
9
22
|
other atoms. (str('f') >> MiniLispParser.new >> str('b'))
|
data/README
CHANGED
@@ -5,10 +5,10 @@ Parslet makes developing complex parsers easy. It does so by
|
|
5
5
|
* providing the best <b>error reporting</b> possible
|
6
6
|
* <b>not generating</b> reams of code for you to debug
|
7
7
|
|
8
|
-
Parslet takes the long way around to make <b>your job</b> easier. It allows
|
9
|
-
incremental language construction. Often, you start out small,
|
10
|
-
the atoms of your language first; _parslet_ takes pride in making
|
11
|
-
possible.
|
8
|
+
Parslet takes the long way around to make <b>your job</b> easier. It allows
|
9
|
+
for incremental language construction. Often, you start out small,
|
10
|
+
implementing the atoms of your language first; _parslet_ takes pride in making
|
11
|
+
this possible.
|
12
12
|
|
13
13
|
Eager to try this out? Please see the associated web site:
|
14
14
|
http://kschiess.github.com/parslet
|
@@ -41,11 +41,15 @@ SYNOPSIS
|
|
41
41
|
|
42
42
|
COMPATIBILITY
|
43
43
|
|
44
|
-
This library should work with most rubies. I've tested it with MRI 1.8
|
45
|
-
rbx-head, jruby. Please report as a bug if you encounter
|
44
|
+
This library should work with most rubies. I've tested it with MRI 1.8
|
45
|
+
(except 1.8.6), 1.9, rbx-head, jruby. Please report as a bug if you encounter
|
46
|
+
issues.
|
47
|
+
|
48
|
+
Note that due to Ruby 1.8 internals, Unicode parsing is not supported on that
|
49
|
+
version.
|
46
50
|
|
47
51
|
STATUS
|
48
52
|
|
49
|
-
At version 1.1 -
|
53
|
+
At version 1.2.1 - See HISTORY.txt for changes.
|
50
54
|
|
51
55
|
(c) 2010 Kaspar Schiess
|
@@ -0,0 +1,70 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
require "parslet"
|
4
|
+
require "pp"
|
5
|
+
|
6
|
+
# Parses strings like "var1 and (var2 or var3)" respecting operator precedence
|
7
|
+
# and parentheses. After that transforms the parse tree into an array of
|
8
|
+
# arrays like this:
|
9
|
+
#
|
10
|
+
# [["1", "2"], ["1", "3"]]
|
11
|
+
#
|
12
|
+
# The array represents a DNF (disjunctive normal form). Elements of outer
|
13
|
+
# array are connected with "or" operator, while elements of inner arrays are
|
14
|
+
# joined with "and".
|
15
|
+
#
|
16
|
+
class Parser < Parslet::Parser
|
17
|
+
rule(:space) { match[" "].repeat(1) }
|
18
|
+
rule(:space?) { space.maybe }
|
19
|
+
|
20
|
+
rule(:lparen) { str("(") >> space? }
|
21
|
+
rule(:rparen) { str(")") >> space? }
|
22
|
+
|
23
|
+
rule(:and_operator) { str("and") >> space? }
|
24
|
+
rule(:or_operator) { str("or") >> space? }
|
25
|
+
|
26
|
+
rule(:var) { str("var") >> match["0-9"].repeat(1).as(:var) >> space? }
|
27
|
+
|
28
|
+
# The primary rule deals with parentheses.
|
29
|
+
rule(:primary) { lparen >> or_operation >> rparen | var }
|
30
|
+
|
31
|
+
# Note that following rules are both right-recursive.
|
32
|
+
rule(:and_operation) {
|
33
|
+
(primary.as(:left) >> and_operator >>
|
34
|
+
and_operation.as(:right)).as(:and) |
|
35
|
+
primary }
|
36
|
+
|
37
|
+
rule(:or_operation) {
|
38
|
+
(and_operation.as(:left) >> or_operator >>
|
39
|
+
or_operation.as(:right)).as(:or) |
|
40
|
+
and_operation }
|
41
|
+
|
42
|
+
# We start at the lowest precedence rule.
|
43
|
+
root(:or_operation)
|
44
|
+
end
|
45
|
+
|
46
|
+
class Transformer < Parslet::Transform
|
47
|
+
rule(:var => simple(:var)) { [[String(var)]] }
|
48
|
+
|
49
|
+
rule(:or => { :left => subtree(:left), :right => subtree(:right) }) do
|
50
|
+
(left + right)
|
51
|
+
end
|
52
|
+
|
53
|
+
rule(:and => { :left => subtree(:left), :right => subtree(:right) }) do
|
54
|
+
res = []
|
55
|
+
left.each do |l|
|
56
|
+
right.each do |r|
|
57
|
+
res << (l + r)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
res
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
pp tree = Parser.new.parse("var1 and (var2 or var3)")
|
65
|
+
# {:and=>
|
66
|
+
# {:left=>{:var=>"1"@3},
|
67
|
+
# :right=>{:or=>{:left=>{:var=>"2"@13}, :right=>{:var=>"3"@21}}}}}
|
68
|
+
pp Transformer.new.apply(tree)
|
69
|
+
# [["1", "2"], ["1", "3"]]
|
70
|
+
|
data/example/json.rb
ADDED
@@ -0,0 +1,131 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
#
|
4
|
+
# MIT License - (c) 2011 John Mettraux
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'rubygems'
|
8
|
+
require 'parslet' # gem install parslet
|
9
|
+
|
10
|
+
|
11
|
+
module MyJson
|
12
|
+
|
13
|
+
class Parser < Parslet::Parser
|
14
|
+
|
15
|
+
rule(:spaces) { match('\s').repeat(1) }
|
16
|
+
rule(:spaces?) { spaces.maybe }
|
17
|
+
|
18
|
+
rule(:comma) { spaces? >> str(',') >> spaces? }
|
19
|
+
rule(:digit) { match('[0-9]') }
|
20
|
+
|
21
|
+
rule(:number) {
|
22
|
+
(
|
23
|
+
str('-').maybe >> (
|
24
|
+
str('0') | (match('[1-9]') >> digit.repeat)
|
25
|
+
) >> (
|
26
|
+
str('.') >> digit.repeat(1)
|
27
|
+
).maybe >> (
|
28
|
+
match('[eE]') >> (str('+') | str('-')).maybe >> digit.repeat(1)
|
29
|
+
).maybe
|
30
|
+
).as(:number)
|
31
|
+
}
|
32
|
+
|
33
|
+
rule(:string) {
|
34
|
+
str('"') >> (
|
35
|
+
str('\\') >> any | str('"').absent? >> any
|
36
|
+
).repeat.as(:string) >> str('"')
|
37
|
+
}
|
38
|
+
|
39
|
+
rule(:array) {
|
40
|
+
str('[') >> spaces? >>
|
41
|
+
(value >> (comma >> value).repeat).maybe.as(:array) >>
|
42
|
+
spaces? >> str(']')
|
43
|
+
}
|
44
|
+
|
45
|
+
rule(:object) {
|
46
|
+
str('{') >> spaces? >>
|
47
|
+
(entry >> (comma >> entry).repeat).maybe.as(:object) >>
|
48
|
+
spaces? >> str('}')
|
49
|
+
}
|
50
|
+
|
51
|
+
rule(:value) {
|
52
|
+
string | number |
|
53
|
+
object | array |
|
54
|
+
str('true').as(:true) | str('false').as(:false) |
|
55
|
+
str('null').as(:null)
|
56
|
+
}
|
57
|
+
|
58
|
+
rule(:entry) {
|
59
|
+
(
|
60
|
+
string.as(:key) >> spaces? >>
|
61
|
+
str(':') >> spaces? >>
|
62
|
+
value.as(:val)
|
63
|
+
).as(:entry)
|
64
|
+
}
|
65
|
+
|
66
|
+
rule(:attribute) { (entry | value).as(:attribute) }
|
67
|
+
|
68
|
+
rule(:top) { spaces? >> value >> spaces? }
|
69
|
+
|
70
|
+
root(:top)
|
71
|
+
end
|
72
|
+
|
73
|
+
class Transformer < Parslet::Transform
|
74
|
+
|
75
|
+
class Entry < Struct.new(:key, :val); end
|
76
|
+
|
77
|
+
rule(:array => subtree(:ar)) {
|
78
|
+
ar.is_a?(Array) ? ar : [ ar ]
|
79
|
+
}
|
80
|
+
rule(:object => subtree(:ob)) {
|
81
|
+
(ob.is_a?(Array) ? ob : [ ob ]).inject({}) { |h, e| h[e.key] = e.val; h }
|
82
|
+
}
|
83
|
+
|
84
|
+
rule(:entry => { :key => simple(:ke), :val => simple(:va) }) {
|
85
|
+
Entry.new(ke, va)
|
86
|
+
}
|
87
|
+
|
88
|
+
rule(:string => simple(:st)) {
|
89
|
+
st.to_s
|
90
|
+
}
|
91
|
+
rule(:number => simple(:nb)) {
|
92
|
+
nb.match(/[eE\.]/) ? Float(nb) : Integer(nb)
|
93
|
+
}
|
94
|
+
|
95
|
+
rule(:null => simple(:nu)) { nil }
|
96
|
+
rule(:true => simple(:tr)) { true }
|
97
|
+
rule(:false => simple(:fa)) { false }
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.parse(s)
|
101
|
+
|
102
|
+
parser = Parser.new
|
103
|
+
transformer = Transformer.new
|
104
|
+
|
105
|
+
tree = parser.parse(s)
|
106
|
+
puts; p tree; puts
|
107
|
+
out = transformer.apply(tree)
|
108
|
+
|
109
|
+
out
|
110
|
+
|
111
|
+
rescue Parslet::ParseFailed => e
|
112
|
+
puts e, parser.root.error_tree
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
s = %{
|
118
|
+
[ 1, 2, 3, null,
|
119
|
+
"asdfasdf asdfds", { "a": -1.2 }, { "b": true, "c": false },
|
120
|
+
0.1e24, true, false, [ 1 ] ]
|
121
|
+
}
|
122
|
+
|
123
|
+
out = MyJson.parse(s)
|
124
|
+
|
125
|
+
p out; puts
|
126
|
+
|
127
|
+
out == [
|
128
|
+
1, 2, 3, nil,
|
129
|
+
"asdfasdf asdfds", { "a" => -1.2 }, { "b" => true, "c" => false },
|
130
|
+
0.1e24, true, false, [ 1 ]
|
131
|
+
] || raise("MyJson is a failure")
|
@@ -0,0 +1,5 @@
|
|
1
|
+
|
2
|
+
{:array=>[{:number=>"1"@5}, {:number=>"2"@8}, {:number=>"3"@11}, {:null=>"null"@14}, {:string=>"asdfasdf asdfds"@25}, {:object=>{:entry=>{:key=>{:string=>"a"@46}, :val=>{:number=>"-1.2"@50}}}}, {:object=>[{:entry=>{:key=>{:string=>"b"@61}, :val=>{:true=>"true"@65}}}, {:entry=>{:key=>{:string=>"c"@72}, :val=>{:false=>"false"@76}}}]}, {:number=>"0.1e24"@89}, {:true=>"true"@97}, {:false=>"false"@103}, {:array=>{:number=>"1"@112}}]}
|
3
|
+
|
4
|
+
[1, 2, 3, nil, "asdfasdf asdfds", {"a"=>-1.2}, {"b"=>true, "c"=>false}, 1.0e+23, true, false, [1]]
|
5
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
["RubyKaigi2009のテーマは、「変わる/変える」です。", " 前回のRubyKaigi2008のテーマであった「多様性」の言葉の通り、 2008年はRubyそのものに関しても、またRubyの活躍する舞台に関しても、 ますます多様化が進みつつあります。", "RubyKaigi2008は、そのような Rubyの生態系をあらためて認識する場となりました。", " しかし、こうした多様化が進む中、異なる者同士が単純に距離を 置いたままでは、その違いを認識したところであまり意味がありません。", " 異なる実装、異なる思想、異なる背景といった、様々な多様性を理解しつつ、 すり合わせるべきものをすり合わせ、変えていくべきところを 変えていくことが、豊かな未来へとつながる道に違いありません。"]
|
data/example/sentence.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
# A small example contributed by John Mettraux (jmettraux) that demonstrates
|
4
|
+
# working with Unicode. This only works on Ruby 1.9.
|
5
|
+
|
6
|
+
$:.unshift File.dirname(__FILE__) + "/../lib"
|
7
|
+
|
8
|
+
require 'parslet'
|
9
|
+
|
10
|
+
class Parser < Parslet::Parser
|
11
|
+
rule(:sentence) { (match('[^。]').repeat(1) >> str("。")).as(:sentence) }
|
12
|
+
rule(:sentences) { sentence.repeat }
|
13
|
+
root(:sentences)
|
14
|
+
end
|
15
|
+
|
16
|
+
class Transformer < Parslet::Transform
|
17
|
+
rule(:sentence => simple(:sen)) { sen.to_s }
|
18
|
+
end
|
19
|
+
|
20
|
+
string =
|
21
|
+
"RubyKaigi2009のテーマは、「変わる/変える」です。 前回の" +
|
22
|
+
"RubyKaigi2008のテーマであった「多様性」の言葉の通り、 " +
|
23
|
+
"2008年はRubyそのものに関しても、またRubyの活躍する舞台に関しても、 " +
|
24
|
+
"ますます多様化が進みつつあります。RubyKaigi2008は、そのような " +
|
25
|
+
"Rubyの生態系をあらためて認識する場となりました。 しかし、" +
|
26
|
+
"こうした多様化が進む中、異なる者同士が単純に距離を 置いたままでは、" +
|
27
|
+
"その違いを認識したところであまり意味がありません。 異なる実装、" +
|
28
|
+
"異なる思想、異なる背景といった、様々な多様性を理解しつつ、 " +
|
29
|
+
"すり合わせるべきものをすり合わせ、変えていくべきところを " +
|
30
|
+
"変えていくことが、豊かな未来へとつながる道に違いありません。"
|
31
|
+
|
32
|
+
parser = Parser.new
|
33
|
+
transformer = Transformer.new
|
34
|
+
|
35
|
+
tree = parser.parse(string)
|
36
|
+
p transformer.apply(tree)
|
data/lib/parslet.rb
CHANGED
@@ -75,6 +75,14 @@ module Parslet
|
|
75
75
|
class ParseFailed < StandardError
|
76
76
|
end
|
77
77
|
|
78
|
+
# Raised when the parse operation didn't consume all of its input. In this
|
79
|
+
# case, it makes only limited sense to look at the error tree. Maybe the
|
80
|
+
# parser worked just fine, but didn't account for the characters at the tail
|
81
|
+
# of the input?
|
82
|
+
#
|
83
|
+
class UnconsumedInput < ParseFailed
|
84
|
+
end
|
85
|
+
|
78
86
|
module ClassMethods
|
79
87
|
# Define an entity for the parser. This generates a method of the same
|
80
88
|
# name that can be used as part of other patterns. Those methods can be
|
data/lib/parslet/atoms/base.rb
CHANGED
@@ -48,14 +48,16 @@ class Parslet::Atoms::Base
|
|
48
48
|
# error to fail with. Otherwise just report that we cannot consume the
|
49
49
|
# input.
|
50
50
|
if cause
|
51
|
-
#
|
52
|
-
raise
|
51
|
+
# We're not using #parse_failed here, since it assigns to @last_cause.
|
52
|
+
# Still: We'll raise this differently, since the real cause is different.
|
53
|
+
raise Parslet::UnconsumedInput,
|
53
54
|
"Unconsumed input, maybe because of this: #{cause}"
|
54
55
|
else
|
55
56
|
old_pos = source.pos
|
56
57
|
parse_failed(
|
57
58
|
format_cause(source,
|
58
|
-
"Don't know what to do with #{source.read(100)}", old_pos)
|
59
|
+
"Don't know what to do with #{source.read(100)}", old_pos),
|
60
|
+
Parslet::UnconsumedInput)
|
59
61
|
end
|
60
62
|
end
|
61
63
|
|
@@ -246,9 +248,9 @@ private
|
|
246
248
|
# Signals to the outside that the parse has failed. Use this in conjunction
|
247
249
|
# with #format_cause for nice error messages.
|
248
250
|
#
|
249
|
-
def parse_failed(cause)
|
251
|
+
def parse_failed(cause, exception_klass=Parslet::ParseFailed)
|
250
252
|
@last_cause = cause
|
251
|
-
raise
|
253
|
+
raise exception_klass,
|
252
254
|
@last_cause.to_s
|
253
255
|
end
|
254
256
|
|
data/lib/parslet/convenience.rb
CHANGED
data/lib/parslet/slice.rb
CHANGED
@@ -1,24 +1,24 @@
|
|
1
1
|
|
2
2
|
# A slice is a small part from the parse input. A slice mainly behaves like
|
3
3
|
# any other string, except that it remembers where it came from (offset in
|
4
|
-
# original input).
|
4
|
+
# original input).
|
5
5
|
#
|
6
6
|
# Some slices also know what parent slice they are a small part of. This
|
7
7
|
# allows the slice to be concatenated to other slices from the same buffer by
|
8
|
-
# reslicing it against that original buffer.
|
8
|
+
# reslicing it against that original buffer.
|
9
9
|
#
|
10
10
|
# Why the complexity? Slices allow retaining offset information. This will
|
11
11
|
# allow to assign line and column to each small bit of output from the parslet
|
12
12
|
# parser. Also, while we keep that information, we might as well try to do
|
13
13
|
# something useful with it. Reslicing the same buffers should in theory keep
|
14
|
-
# buffer copies and allocations down.
|
14
|
+
# buffer copies and allocations down.
|
15
15
|
#
|
16
16
|
# == Extracting line and column
|
17
17
|
#
|
18
18
|
# Using the #line_and_column method, you can extract the line and column in
|
19
|
-
# the original input where this slice starts.
|
19
|
+
# the original input where this slice starts.
|
20
20
|
#
|
21
|
-
# Example:
|
21
|
+
# Example:
|
22
22
|
# slice.line_and_column # => [1, 13]
|
23
23
|
# slice.offset # => 12
|
24
24
|
#
|
@@ -30,117 +30,73 @@
|
|
30
30
|
# calling #to_s.
|
31
31
|
#
|
32
32
|
# These omissions are somewhat intentional. Rather than maintaining a full
|
33
|
-
# delegation, we opt for a partial emulation that gets the job done.
|
33
|
+
# delegation, we opt for a partial emulation that gets the job done.
|
34
34
|
#
|
35
35
|
# Note also that there are some things that work with strings that will never
|
36
36
|
# work when using slices. For instance, you cannot concatenate slices that
|
37
|
-
# aren't from the same source or that don't join up:
|
37
|
+
# aren't from the same source or that don't join up:
|
38
38
|
#
|
39
|
-
# Example:
|
39
|
+
# Example:
|
40
40
|
# big_slice = 'abcdef'
|
41
41
|
# a = big_slice.slice(0, 2) # => "ab"@0
|
42
42
|
# b = big_slice.slice(4, 2) # => "ef"@4
|
43
|
-
#
|
43
|
+
#
|
44
44
|
# a + b # raises Parslet::InvalidSliceOperation
|
45
45
|
#
|
46
46
|
# This avoids creating slices with impossible offsets or that are
|
47
|
-
# discontinous.
|
47
|
+
# discontinous.
|
48
48
|
#
|
49
49
|
class Parslet::Slice
|
50
50
|
attr_reader :str, :offset
|
51
|
-
attr_reader :parent
|
52
51
|
attr_reader :source
|
53
|
-
|
54
|
-
def initialize(string, offset, source=nil
|
52
|
+
|
53
|
+
def initialize(string, offset, source=nil)
|
55
54
|
@str, @offset = string, offset
|
56
55
|
@source = source
|
57
|
-
@parent = parent
|
58
56
|
end
|
59
|
-
|
60
|
-
# Compares slices to other slices or strings.
|
57
|
+
|
58
|
+
# Compares slices to other slices or strings.
|
61
59
|
#
|
62
60
|
def == other
|
63
61
|
str == other
|
64
62
|
end
|
65
|
-
|
66
|
-
# Match regular expressions.
|
67
|
-
#
|
63
|
+
|
64
|
+
# Match regular expressions.
|
65
|
+
#
|
68
66
|
def match(regexp)
|
69
67
|
str.match(regexp)
|
70
68
|
end
|
71
|
-
|
72
|
-
# Returns
|
73
|
-
# Whenever possible, return parts of the parent buffer that this slice was
|
74
|
-
# cut out of.
|
75
|
-
#
|
76
|
-
def slice(start, length)
|
77
|
-
# NOTE: At a later stage, we might not want to create huge trees of slices.
|
78
|
-
# The fact that the root of the tree creates slices that link to it makes
|
79
|
-
# the tree already rather flat.
|
80
|
-
|
81
|
-
if parent
|
82
|
-
parent.slice(offset - parent.offset, length)
|
83
|
-
else
|
84
|
-
self.class.new(str.slice(start, length), offset+start, source, self)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
# Returns a slice that starts at file offset start and that has length
|
89
|
-
# characters in it.
|
90
|
-
#
|
91
|
-
def abs_slice(start, length)
|
92
|
-
slice(start-offset, length)
|
93
|
-
end
|
94
|
-
|
95
|
-
# True if this slice can satisfy an original input request to the
|
96
|
-
# range ofs, len.
|
69
|
+
|
70
|
+
# Returns the slices size in characters.
|
97
71
|
#
|
98
|
-
def satisfies?(ofs, len)
|
99
|
-
ofs >= offset && (ofs-offset+len-1)<str.size
|
100
|
-
end
|
101
|
-
|
102
72
|
def size
|
103
73
|
str.size
|
104
74
|
end
|
75
|
+
|
76
|
+
# Concatenate two slices; it is assumed that the second slice begins
|
77
|
+
# where the first one ends. The offset of the resulting slice is the same
|
78
|
+
# as the one of this slice.
|
79
|
+
#
|
105
80
|
def +(other)
|
106
|
-
|
107
|
-
"Cannot concat something other than a slice to a slice." \
|
108
|
-
unless other.respond_to?(:to_slice)
|
109
|
-
|
110
|
-
raise Parslet::InvalidSliceOperation,
|
111
|
-
"Cannot join slices that aren't adjacent."+
|
112
|
-
" (#{self.inspect} + #{other.inspect})" \
|
113
|
-
if offset+size != other.offset
|
114
|
-
|
115
|
-
raise Parslet::InvalidSliceOperation, "Not from the same source." \
|
116
|
-
if source != other.source
|
117
|
-
|
118
|
-
# If both slices stem from the same bigger buffer, we can reslice that
|
119
|
-
# buffer to (probably) avoid a buffer copy, as long as the strings are
|
120
|
-
# not modified.
|
121
|
-
if parent && parent == other.parent
|
122
|
-
return parent.abs_slice(offset, size+other.size)
|
123
|
-
end
|
124
|
-
|
125
|
-
self.class.new(str + other.str, offset, source)
|
81
|
+
self.class.new(str + other.to_s, offset, source)
|
126
82
|
end
|
127
|
-
|
128
|
-
# Returns a <line, column> tuple referring to the original input.
|
83
|
+
|
84
|
+
# Returns a <line, column> tuple referring to the original input.
|
129
85
|
#
|
130
86
|
def line_and_column
|
131
87
|
raise ArgumentError, "No source was given, cannot infer line and column." \
|
132
88
|
unless source
|
133
|
-
|
89
|
+
|
134
90
|
source.line_and_column(self.offset)
|
135
91
|
end
|
136
92
|
|
137
|
-
|
93
|
+
|
138
94
|
# Conversion operators -----------------------------------------------------
|
139
95
|
def to_str
|
140
96
|
str
|
141
97
|
end
|
142
98
|
alias to_s to_str
|
143
|
-
|
99
|
+
|
144
100
|
def to_slice
|
145
101
|
self
|
146
102
|
end
|
@@ -156,7 +112,7 @@ class Parslet::Slice
|
|
156
112
|
def to_f
|
157
113
|
str.to_f
|
158
114
|
end
|
159
|
-
|
115
|
+
|
160
116
|
# Inspection & Debugging ---------------------------------------------------
|
161
117
|
|
162
118
|
# Prints the slice as <code>"string"@offset</code>.
|
@@ -165,7 +121,7 @@ class Parslet::Slice
|
|
165
121
|
end
|
166
122
|
end
|
167
123
|
|
168
|
-
# Raised when trying to do an operation on slices that cannot succeed, like
|
124
|
+
# Raised when trying to do an operation on slices that cannot succeed, like
|
169
125
|
# adding non-adjacent slices. See Parslet::Slice.
|
170
126
|
#
|
171
127
|
class Parslet::InvalidSliceOperation < StandardError
|
data/lib/parslet/source.rb
CHANGED
@@ -14,32 +14,25 @@ class Parslet::Source
|
|
14
14
|
end
|
15
15
|
|
16
16
|
@io = io
|
17
|
-
@virtual_position = @io.pos
|
18
|
-
@eof_position = nil
|
19
|
-
|
20
17
|
@line_cache = LineCache.new
|
21
|
-
|
22
|
-
# Stores an array of <offset, buffer> tuples.
|
23
|
-
@slices = []
|
24
18
|
end
|
25
19
|
|
26
20
|
# Reads n chars from the input and returns a Range instance.
|
27
21
|
#
|
28
22
|
def read(n)
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
slice
|
23
|
+
raise ArgumentError, "Cannot read <= 1 characters at a time." \
|
24
|
+
if n < 1
|
25
|
+
read_slice(n)
|
33
26
|
end
|
34
27
|
|
35
28
|
def eof?
|
36
|
-
@
|
29
|
+
@io.eof?
|
37
30
|
end
|
38
31
|
def pos
|
39
|
-
@
|
32
|
+
@io.pos
|
40
33
|
end
|
41
34
|
def pos=(new_pos)
|
42
|
-
@
|
35
|
+
@io.pos = new_pos
|
43
36
|
end
|
44
37
|
|
45
38
|
# Returns a <line, column> tuple for the given position. If no position is
|
@@ -51,59 +44,25 @@ class Parslet::Source
|
|
51
44
|
end
|
52
45
|
|
53
46
|
private
|
54
|
-
# Minimal size of a single read
|
55
|
-
MIN_READ_SIZE = 10 * 1024
|
56
|
-
# Number of slices to keep
|
57
|
-
BUFFER_CACHE_SIZE = 10
|
58
|
-
|
59
|
-
# Reads and returns a piece of the input that contains length chars starting
|
60
|
-
# at offset.
|
61
|
-
#
|
62
|
-
def read_from_cache(offset, length)
|
63
|
-
# Do we already have a buffer that contains the given range?
|
64
|
-
# Return that.
|
65
|
-
slice = @slices.find { |slice|
|
66
|
-
slice.satisfies?(offset, length) }
|
67
|
-
return slice.abs_slice(offset, length) if slice
|
68
|
-
|
69
|
-
# Read a new buffer: Can the demand be satisfied by sequentially reading
|
70
|
-
# from the current position?
|
71
|
-
needed = offset-@io.pos+length
|
72
|
-
if @io.pos <= offset && needed<MIN_READ_SIZE
|
73
|
-
# read the slice
|
74
|
-
slice = read_slice(needed)
|
75
|
-
return slice.abs_slice(offset, length)
|
76
|
-
end
|
77
|
-
|
78
|
-
# Otherwise seek and read enough so that we can satisfy the demand.
|
79
|
-
@io.pos = offset
|
80
|
-
|
81
|
-
slice = read_slice(needed)
|
82
|
-
return slice.abs_slice(offset, length)
|
83
|
-
end
|
84
|
-
|
85
47
|
def read_slice(needed)
|
86
48
|
start = @io.pos
|
87
|
-
|
88
|
-
buf = @io.read(request)
|
89
|
-
|
90
|
-
# remember eof position
|
91
|
-
if !buf || buf.size<request
|
92
|
-
@eof_position = @io.pos
|
93
|
-
end
|
49
|
+
buf = @io.gets(nil, needed)
|
94
50
|
|
95
51
|
# cache line ends
|
96
52
|
@line_cache.scan_for_line_endings(start, buf)
|
97
53
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
54
|
+
Parslet::Slice.new(buf || '', start, self)
|
55
|
+
end
|
56
|
+
|
57
|
+
if RUBY_VERSION !~ /^1.9/
|
58
|
+
def read_slice(needed)
|
59
|
+
start = @io.pos
|
60
|
+
buf = @io.read(needed)
|
61
|
+
|
62
|
+
# cache line ends
|
63
|
+
@line_cache.scan_for_line_endings(start, buf)
|
64
|
+
|
65
|
+
Parslet::Slice.new(buf || '', start, self)
|
66
|
+
end
|
108
67
|
end
|
109
68
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: parslet
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 1.2.
|
5
|
+
version: 1.2.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Kaspar Schiess
|
@@ -10,8 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
14
|
-
default_executable:
|
13
|
+
date: 2011-06-05 00:00:00 Z
|
15
14
|
dependencies:
|
16
15
|
- !ruby/object:Gem::Dependency
|
17
16
|
name: blankslate
|
@@ -100,14 +99,17 @@ files:
|
|
100
99
|
- lib/parslet/source.rb
|
101
100
|
- lib/parslet/transform.rb
|
102
101
|
- lib/parslet.rb
|
102
|
+
- example/boolean_algebra.rb
|
103
103
|
- example/comments.rb
|
104
104
|
- example/documentation.rb
|
105
105
|
- example/email_parser.rb
|
106
106
|
- example/empty.rb
|
107
107
|
- example/erb.rb
|
108
108
|
- example/ip_address.rb
|
109
|
+
- example/json.rb
|
109
110
|
- example/local.rb
|
110
111
|
- example/minilisp.rb
|
112
|
+
- example/output/boolean_algebra.out
|
111
113
|
- example/output/comments.out
|
112
114
|
- example/output/documentation.err
|
113
115
|
- example/output/documentation.out
|
@@ -115,21 +117,23 @@ files:
|
|
115
117
|
- example/output/empty.err
|
116
118
|
- example/output/erb.out
|
117
119
|
- example/output/ip_address.out
|
120
|
+
- example/output/json.out
|
118
121
|
- example/output/local.out
|
119
122
|
- example/output/minilisp.out
|
120
123
|
- example/output/parens.out
|
121
124
|
- example/output/readme.out
|
122
125
|
- example/output/seasons.out
|
126
|
+
- example/output/sentence.out
|
123
127
|
- example/output/simple_xml.out
|
124
128
|
- example/output/string_parser.out
|
125
129
|
- example/parens.rb
|
126
130
|
- example/readme.rb
|
127
131
|
- example/seasons.rb
|
132
|
+
- example/sentence.rb
|
128
133
|
- example/simple.lit
|
129
134
|
- example/simple_xml.rb
|
130
135
|
- example/string_parser.rb
|
131
136
|
- example/test.lit
|
132
|
-
has_rdoc: true
|
133
137
|
homepage: http://kschiess.github.com/parslet
|
134
138
|
licenses: []
|
135
139
|
|
@@ -154,7 +158,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
158
|
requirements: []
|
155
159
|
|
156
160
|
rubyforge_project:
|
157
|
-
rubygems_version: 1.5
|
161
|
+
rubygems_version: 1.8.5
|
158
162
|
signing_key:
|
159
163
|
specification_version: 3
|
160
164
|
summary: Parser construction library with great error reporting in Ruby.
|