parslet 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/HISTORY.txt +29 -0
  2. data/README +2 -4
  3. data/Rakefile +18 -4
  4. data/example/comments.rb +11 -13
  5. data/example/documentation.rb +1 -1
  6. data/example/email_parser.rb +5 -5
  7. data/example/empty.rb +2 -2
  8. data/example/erb.rb +6 -3
  9. data/example/ip_address.rb +2 -2
  10. data/example/local.rb +34 -0
  11. data/example/minilisp.rb +2 -2
  12. data/example/output/comments.out +8 -0
  13. data/example/output/documentation.err +4 -0
  14. data/example/output/documentation.out +1 -0
  15. data/example/output/email_parser.out +2 -0
  16. data/example/output/empty.err +1 -0
  17. data/example/output/erb.out +7 -0
  18. data/example/output/ip_address.out +9 -0
  19. data/example/output/local.out +3 -0
  20. data/example/output/minilisp.out +5 -0
  21. data/example/output/parens.out +8 -0
  22. data/example/output/readme.out +1 -0
  23. data/example/output/seasons.out +28 -0
  24. data/example/output/simple_xml.out +2 -0
  25. data/example/output/string_parser.out +3 -0
  26. data/example/parens.rb +1 -3
  27. data/example/readme.rb +4 -10
  28. data/example/seasons.rb +2 -1
  29. data/example/simple_xml.rb +5 -8
  30. data/example/string_parser.rb +7 -5
  31. data/lib/parslet.rb +20 -31
  32. data/lib/parslet/atoms.rb +1 -0
  33. data/lib/parslet/atoms/base.rb +46 -87
  34. data/lib/parslet/atoms/dsl.rb +98 -0
  35. data/lib/parslet/atoms/entity.rb +3 -4
  36. data/lib/parslet/atoms/lookahead.rb +1 -1
  37. data/lib/parslet/atoms/re.rb +2 -2
  38. data/lib/parslet/atoms/str.rb +5 -2
  39. data/lib/parslet/atoms/transform.rb +75 -0
  40. data/lib/parslet/atoms/visitor.rb +9 -9
  41. data/lib/parslet/convenience.rb +3 -3
  42. data/lib/parslet/export.rb +13 -13
  43. data/lib/parslet/expression/treetop.rb +2 -2
  44. data/lib/parslet/parser.rb +55 -1
  45. data/lib/parslet/rig/rspec.rb +36 -10
  46. data/lib/parslet/slice.rb +172 -0
  47. data/lib/parslet/source.rb +72 -83
  48. data/lib/parslet/source/line_cache.rb +90 -0
  49. metadata +22 -20
@@ -0,0 +1,172 @@
1
+
2
+ # A slice is a small part from the parse input. A slice mainly behaves like
3
+ # any other string, except that it remembers where it came from (offset in
4
+ # original input).
5
+ #
6
+ # Some slices also know what parent slice they are a small part of. This
7
+ # allows the slice to be concatenated to other slices from the same buffer by
8
+ # reslicing it against that original buffer.
9
+ #
10
+ # Why the complexity? Slices allow retaining offset information. This will
11
+ # allow to assign line and column to each small bit of output from the parslet
12
+ # parser. Also, while we keep that information, we might as well try to do
13
+ # something useful with it. Reslicing the same buffers should in theory keep
14
+ # buffer copies and allocations down.
15
+ #
16
+ # == Extracting line and column
17
+ #
18
+ # Using the #line_and_column method, you can extract the line and column in
19
+ # the original input where this slice starts.
20
+ #
21
+ # Example:
22
+ # slice.line_and_column # => [1, 13]
23
+ # slice.offset # => 12
24
+ #
25
+ # == Likeness to strings
26
+ #
27
+ # Parslet::Slice behaves in many ways like a Ruby String. This likeness
28
+ # however is not complete - many of the myriad of operations String supports
29
+ # are not yet in Slice. You can always extract the internal string instance by
30
+ # calling #to_s.
31
+ #
32
+ # These omissions are somewhat intentional. Rather than maintaining a full
33
+ # delegation, we opt for a partial emulation that gets the job done.
34
+ #
35
+ # Note also that there are some things that work with strings that will never
36
+ # work when using slices. For instance, you cannot concatenate slices that
37
+ # aren't from the same source or that don't join up:
38
+ #
39
+ # Example:
40
+ # big_slice = 'abcdef'
41
+ # a = big_slice.slice(0, 2) # => "ab"@0
42
+ # b = big_slice.slice(4, 2) # => "ef"@4
43
+ #
44
+ # a + b # raises Parslet::InvalidSliceOperation
45
+ #
46
+ # This avoids creating slices with impossible offsets or that are
47
+ # discontinous.
48
+ #
49
+ class Parslet::Slice
50
+ attr_reader :str, :offset
51
+ attr_reader :parent
52
+ attr_reader :source
53
+
54
+ def initialize(string, offset, source=nil, parent=nil)
55
+ @str, @offset = string, offset
56
+ @source = source
57
+ @parent = parent
58
+ end
59
+
60
+ # Compares slices to other slices or strings.
61
+ #
62
+ def == other
63
+ str == other
64
+ end
65
+
66
+ # Match regular expressions.
67
+ #
68
+ def match(regexp)
69
+ str.match(regexp)
70
+ end
71
+
72
+ # Returns a slice that starts at offset start and that has length characters.
73
+ # Whenever possible, return parts of the parent buffer that this slice was
74
+ # cut out of.
75
+ #
76
+ def slice(start, length)
77
+ # NOTE: At a later stage, we might not want to create huge trees of slices.
78
+ # The fact that the root of the tree creates slices that link to it makes
79
+ # the tree already rather flat.
80
+
81
+ if parent
82
+ parent.slice(offset - parent.offset, length)
83
+ else
84
+ self.class.new(str.slice(start, length), offset+start, source, self)
85
+ end
86
+ end
87
+
88
+ # Returns a slice that starts at file offset start and that has length
89
+ # characters in it.
90
+ #
91
+ def abs_slice(start, length)
92
+ slice(start-offset, length)
93
+ end
94
+
95
+ # True if this slice can satisfy an original input request to the
96
+ # range ofs, len.
97
+ #
98
+ def satisfies?(ofs, len)
99
+ ofs >= offset && (ofs-offset+len-1)<str.size
100
+ end
101
+
102
+ def size
103
+ str.size
104
+ end
105
+ def +(other)
106
+ raise ArgumentError,
107
+ "Cannot concat something other than a slice to a slice." \
108
+ unless other.respond_to?(:to_slice)
109
+
110
+ raise Parslet::InvalidSliceOperation,
111
+ "Cannot join slices that aren't adjacent."+
112
+ " (#{self.inspect} + #{other.inspect})" \
113
+ if offset+size != other.offset
114
+
115
+ raise Parslet::InvalidSliceOperation, "Not from the same source." \
116
+ if source != other.source
117
+
118
+ # If both slices stem from the same bigger buffer, we can reslice that
119
+ # buffer to (probably) avoid a buffer copy, as long as the strings are
120
+ # not modified.
121
+ if parent && parent == other.parent
122
+ return parent.abs_slice(offset, size+other.size)
123
+ end
124
+
125
+ self.class.new(str + other.str, offset, source)
126
+ end
127
+
128
+ # Returns a <line, column> tuple referring to the original input.
129
+ #
130
+ def line_and_column
131
+ raise ArgumentError, "No source was given, cannot infer line and column." \
132
+ unless source
133
+
134
+ source.line_and_column(self.offset)
135
+ end
136
+
137
+
138
+ # Conversion operators -----------------------------------------------------
139
+ def to_str
140
+ str
141
+ end
142
+ alias to_s to_str
143
+
144
+ def to_slice
145
+ self
146
+ end
147
+ def to_sym
148
+ str.to_sym
149
+ end
150
+ def to_int
151
+ Integer(str)
152
+ end
153
+ def to_i
154
+ str.to_i
155
+ end
156
+ def to_f
157
+ str.to_f
158
+ end
159
+
160
+ # Inspection & Debugging ---------------------------------------------------
161
+
162
+ # Prints the slice as <code>"string"@offset</code>.
163
+ def inspect
164
+ str.inspect << "@#{offset}"
165
+ end
166
+ end
167
+
168
+ # Raised when trying to do an operation on slices that cannot succeed, like
169
+ # adding non-adjacent slices. See Parslet::Slice.
170
+ #
171
+ class Parslet::InvalidSliceOperation < StandardError
172
+ end
@@ -1,120 +1,109 @@
1
1
 
2
2
  require 'stringio'
3
3
 
4
+ require 'parslet/source/line_cache'
5
+
4
6
  # Wraps the input IO to parslet. The interface defined by this class is
5
7
  # smaller than what IO offers, but enhances it with a #column and #line
6
8
  # method for the current position.
7
9
  #
8
10
  class Parslet::Source
9
- attr_reader :line_ends
10
-
11
11
  def initialize(io)
12
12
  if io.respond_to? :to_str
13
13
  io = StringIO.new(io)
14
14
  end
15
15
 
16
16
  @io = io
17
- warn "Line counting will be off if the IO is not rewound." unless @io.pos==0
17
+ @virtual_position = @io.pos
18
+ @eof_position = nil
18
19
 
19
- # Stores line endings as a simple position number. The first line always
20
- # starts at 0; numbers beyond the biggest entry are on any line > size,
21
- # but probably make a scan to that position neccessary.
22
- @line_ends = []
23
- @line_ends.extend RangeSearch
20
+ @line_cache = LineCache.new
21
+
22
+ # Stores an array of <offset, buffer> tuples.
23
+ @slices = []
24
24
  end
25
25
 
26
+ # Reads n chars from the input and returns a Range instance.
27
+ #
26
28
  def read(n)
27
- start_pos = pos
28
- @io.read(n).tap { |buf| scan_for_line_endings(start_pos, buf) }
29
+ slice = read_from_cache(@virtual_position, n)
30
+ @virtual_position += slice.size
31
+
32
+ slice
29
33
  end
30
34
 
31
35
  def eof?
32
- @io.eof?
36
+ @eof_position && @virtual_position >= @eof_position
33
37
  end
34
-
35
38
  def pos
36
- @io.pos
39
+ @virtual_position
37
40
  end
38
-
39
- # NOTE: If you seek beyond the point that you last read, you will get
40
- # undefined behaviour. This is by design.
41
41
  def pos=(new_pos)
42
- @io.pos = new_pos
43
- end
44
-
45
- def line_and_column(position=nil)
46
- pos = (position || self.pos)
47
- eol_idx = @line_ends.lbound(pos)
48
-
49
- if eol_idx
50
- # eol_idx points to the offset that ends the current line.
51
- # Let's try to find the offset that starts it:
52
- offset = eol_idx>0 && @line_ends[eol_idx-1] || 0
53
- return [eol_idx+1, pos-offset+1]
54
- else
55
- # eol_idx is nil, that means that we're beyond the last line end that
56
- # we know about. Pretend for now that we're just on the last line.
57
- offset = @line_ends.last || 0
58
- return [@line_ends.size+1, pos-offset+1]
59
- end
42
+ @virtual_position = new_pos
60
43
  end
61
44
 
62
- # Mixin for arrays that implicitly give a number of ranges, where one range
63
- # begins where the other one ends.
64
- #
65
- # Example:
66
- #
67
- # [10, 20, 30]
68
- # # would describe [0, 10], (10, 20], (20, 30]
45
+ # Returns a <line, column> tuple for the given position. If no position is
46
+ # given, line/column information is returned for the current position given
47
+ # by #pos.
69
48
  #
70
- module RangeSearch
71
- # Scans the array for the first number that is > than bound. Returns the
72
- # index of that number.
73
- #
74
- def lbound(bound)
75
- return nil if empty?
76
- return nil unless last > bound
77
-
78
- left = 0
79
- right = size - 1
80
-
81
- n = 10
82
- loop do
83
- mid = left + (right - left) / 2
84
-
85
- if self[mid] > bound
86
- right = mid
87
- else
88
- # assert: self[mid] <= bound
89
- left = mid+1
90
- end
91
-
92
- if right <= left
93
- return right
94
- end
95
- end
96
- end
49
+ def line_and_column(position=nil)
50
+ @line_cache.line_and_column(position || self.pos)
97
51
  end
98
52
 
99
53
  private
54
+ # Minimal size of a single read
55
+ MIN_READ_SIZE = 10 * 1024
56
+ # Number of slices to keep
57
+ BUFFER_CACHE_SIZE = 10
58
+
59
+ # Reads and returns a piece of the input that contains length chars starting
60
+ # at offset.
61
+ #
62
+ def read_from_cache(offset, length)
63
+ # Do we already have a buffer that contains the given range?
64
+ # Return that.
65
+ slice = @slices.find { |slice|
66
+ slice.satisfies?(offset, length) }
67
+ return slice.abs_slice(offset, length) if slice
68
+
69
+ # Read a new buffer: Can the demand be satisfied by sequentially reading
70
+ # from the current position?
71
+ needed = offset-@io.pos+length
72
+ if @io.pos <= offset && needed<MIN_READ_SIZE
73
+ # read the slice
74
+ slice = read_slice(needed)
75
+ return slice.abs_slice(offset, length)
76
+ end
77
+
78
+ # Otherwise seek and read enough so that we can satisfy the demand.
79
+ @io.pos = offset
100
80
 
101
- def scan_for_line_endings(start_pos, buf)
102
- return unless buf
103
- return unless buf.index("\n")
104
- cur = -1
81
+ slice = read_slice(needed)
82
+ return slice.abs_slice(offset, length)
83
+ end
105
84
 
106
- # If we have already read part or all of buf, we already know about
107
- # line ends in that portion. remove it and correct cur (search index)
108
- if @last_line_end && start_pos < @last_line_end
109
- # Let's not search the range from start_pos to last_line_end again.
110
- cur = @last_line_end - start_pos -1
85
+ def read_slice(needed)
86
+ start = @io.pos
87
+ request = [MIN_READ_SIZE, needed].max
88
+ buf = @io.read(request)
89
+
90
+ # remember eof position
91
+ if !buf || buf.size<request
92
+ @eof_position = @io.pos
111
93
  end
94
+
95
+ # cache line ends
96
+ @line_cache.scan_for_line_endings(start, buf)
97
+
98
+ slice = Parslet::Slice.new(buf || '', start, self)
99
+
100
+ # Don't cache empty slices.
101
+ return slice unless buf
102
+
103
+ # cache the buffer (and eject old entries)
104
+ @slices << slice
105
+ @slices.shift if @slices.size > BUFFER_CACHE_SIZE
112
106
 
113
- # Scan the string for line endings; store the positions of all endings
114
- # in @line_ends.
115
- while buf && cur = buf.index("\n", cur+1)
116
- @last_line_end = (start_pos + cur+1)
117
- @line_ends << @last_line_end
118
- end
107
+ slice
119
108
  end
120
109
  end
@@ -0,0 +1,90 @@
1
+
2
+
3
+ class Parslet::Source
4
+ # A cache for line start positions.
5
+ #
6
+ class LineCache # :nodoc:
7
+ def initialize
8
+ # Stores line endings as a simple position number. The first line always
9
+ # starts at 0; numbers beyond the biggest entry are on any line > size,
10
+ # but probably make a scan to that position neccessary.
11
+ @line_ends = []
12
+ @line_ends.extend RangeSearch
13
+ end
14
+
15
+ # Returns a <line, column> tuple for the given input position.
16
+ #
17
+ def line_and_column(pos)
18
+ eol_idx = @line_ends.lbound(pos)
19
+
20
+ if eol_idx
21
+ # eol_idx points to the offset that ends the current line.
22
+ # Let's try to find the offset that starts it:
23
+ offset = eol_idx>0 && @line_ends[eol_idx-1] || 0
24
+ return [eol_idx+1, pos-offset+1]
25
+ else
26
+ # eol_idx is nil, that means that we're beyond the last line end that
27
+ # we know about. Pretend for now that we're just on the last line.
28
+ offset = @line_ends.last || 0
29
+ return [@line_ends.size+1, pos-offset+1]
30
+ end
31
+ end
32
+
33
+ def scan_for_line_endings(start_pos, buf)
34
+ return unless buf
35
+ return unless buf.index("\n")
36
+ cur = -1
37
+
38
+ # If we have already read part or all of buf, we already know about
39
+ # line ends in that portion. remove it and correct cur (search index)
40
+ if @last_line_end && start_pos < @last_line_end
41
+ # Let's not search the range from start_pos to last_line_end again.
42
+ cur = @last_line_end - start_pos -1
43
+ end
44
+
45
+ # Scan the string for line endings; store the positions of all endings
46
+ # in @line_ends.
47
+ while buf && cur = buf.index("\n", cur+1)
48
+ @last_line_end = (start_pos + cur+1)
49
+ @line_ends << @last_line_end
50
+ end
51
+ end
52
+ end
53
+
54
+ # Mixin for arrays that implicitly give a number of ranges, where one range
55
+ # begins where the other one ends.
56
+ #
57
+ # Example:
58
+ #
59
+ # [10, 20, 30]
60
+ # # would describe [0, 10], (10, 20], (20, 30]
61
+ #
62
+ module RangeSearch # :nodoc:
63
+ # Scans the array for the first number that is > than bound. Returns the
64
+ # index of that number.
65
+ #
66
+ def lbound(bound)
67
+ return nil if empty?
68
+ return nil unless last > bound
69
+
70
+ left = 0
71
+ right = size - 1
72
+
73
+ n = 10
74
+ loop do
75
+ mid = left + (right - left) / 2
76
+
77
+ if self[mid] > bound
78
+ right = mid
79
+ else
80
+ # assert: self[mid] <= bound
81
+ left = mid+1
82
+ end
83
+
84
+ if right <= left
85
+ return right
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end