parslet 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/HISTORY.txt +29 -0
  2. data/README +2 -4
  3. data/Rakefile +18 -4
  4. data/example/comments.rb +11 -13
  5. data/example/documentation.rb +1 -1
  6. data/example/email_parser.rb +5 -5
  7. data/example/empty.rb +2 -2
  8. data/example/erb.rb +6 -3
  9. data/example/ip_address.rb +2 -2
  10. data/example/local.rb +34 -0
  11. data/example/minilisp.rb +2 -2
  12. data/example/output/comments.out +8 -0
  13. data/example/output/documentation.err +4 -0
  14. data/example/output/documentation.out +1 -0
  15. data/example/output/email_parser.out +2 -0
  16. data/example/output/empty.err +1 -0
  17. data/example/output/erb.out +7 -0
  18. data/example/output/ip_address.out +9 -0
  19. data/example/output/local.out +3 -0
  20. data/example/output/minilisp.out +5 -0
  21. data/example/output/parens.out +8 -0
  22. data/example/output/readme.out +1 -0
  23. data/example/output/seasons.out +28 -0
  24. data/example/output/simple_xml.out +2 -0
  25. data/example/output/string_parser.out +3 -0
  26. data/example/parens.rb +1 -3
  27. data/example/readme.rb +4 -10
  28. data/example/seasons.rb +2 -1
  29. data/example/simple_xml.rb +5 -8
  30. data/example/string_parser.rb +7 -5
  31. data/lib/parslet.rb +20 -31
  32. data/lib/parslet/atoms.rb +1 -0
  33. data/lib/parslet/atoms/base.rb +46 -87
  34. data/lib/parslet/atoms/dsl.rb +98 -0
  35. data/lib/parslet/atoms/entity.rb +3 -4
  36. data/lib/parslet/atoms/lookahead.rb +1 -1
  37. data/lib/parslet/atoms/re.rb +2 -2
  38. data/lib/parslet/atoms/str.rb +5 -2
  39. data/lib/parslet/atoms/transform.rb +75 -0
  40. data/lib/parslet/atoms/visitor.rb +9 -9
  41. data/lib/parslet/convenience.rb +3 -3
  42. data/lib/parslet/export.rb +13 -13
  43. data/lib/parslet/expression/treetop.rb +2 -2
  44. data/lib/parslet/parser.rb +55 -1
  45. data/lib/parslet/rig/rspec.rb +36 -10
  46. data/lib/parslet/slice.rb +172 -0
  47. data/lib/parslet/source.rb +72 -83
  48. data/lib/parslet/source/line_cache.rb +90 -0
  49. metadata +22 -20
@@ -0,0 +1,172 @@
1
+
2
+ # A slice is a small part from the parse input. A slice mainly behaves like
3
+ # any other string, except that it remembers where it came from (offset in
4
+ # original input).
5
+ #
6
+ # Some slices also know what parent slice they are a small part of. This
7
+ # allows the slice to be concatenated to other slices from the same buffer by
8
+ # reslicing it against that original buffer.
9
+ #
10
+ # Why the complexity? Slices allow retaining offset information. This will
11
+ # allow to assign line and column to each small bit of output from the parslet
12
+ # parser. Also, while we keep that information, we might as well try to do
13
+ # something useful with it. Reslicing the same buffers should in theory keep
14
+ # buffer copies and allocations down.
15
+ #
16
+ # == Extracting line and column
17
+ #
18
+ # Using the #line_and_column method, you can extract the line and column in
19
+ # the original input where this slice starts.
20
+ #
21
+ # Example:
22
+ # slice.line_and_column # => [1, 13]
23
+ # slice.offset # => 12
24
+ #
25
+ # == Likeness to strings
26
+ #
27
+ # Parslet::Slice behaves in many ways like a Ruby String. This likeness
28
+ # however is not complete - many of the myriad of operations String supports
29
+ # are not yet in Slice. You can always extract the internal string instance by
30
+ # calling #to_s.
31
+ #
32
+ # These omissions are somewhat intentional. Rather than maintaining a full
33
+ # delegation, we opt for a partial emulation that gets the job done.
34
+ #
35
+ # Note also that there are some things that work with strings that will never
36
+ # work when using slices. For instance, you cannot concatenate slices that
37
+ # aren't from the same source or that don't join up:
38
+ #
39
+ # Example:
40
+ # big_slice = 'abcdef'
41
+ # a = big_slice.slice(0, 2) # => "ab"@0
42
+ # b = big_slice.slice(4, 2) # => "ef"@4
43
+ #
44
+ # a + b # raises Parslet::InvalidSliceOperation
45
+ #
46
+ # This avoids creating slices with impossible offsets or that are
47
+ # discontinous.
48
+ #
49
+ class Parslet::Slice
50
+ attr_reader :str, :offset
51
+ attr_reader :parent
52
+ attr_reader :source
53
+
54
+ def initialize(string, offset, source=nil, parent=nil)
55
+ @str, @offset = string, offset
56
+ @source = source
57
+ @parent = parent
58
+ end
59
+
60
+ # Compares slices to other slices or strings.
61
+ #
62
+ def == other
63
+ str == other
64
+ end
65
+
66
+ # Match regular expressions.
67
+ #
68
+ def match(regexp)
69
+ str.match(regexp)
70
+ end
71
+
72
+ # Returns a slice that starts at offset start and that has length characters.
73
+ # Whenever possible, return parts of the parent buffer that this slice was
74
+ # cut out of.
75
+ #
76
+ def slice(start, length)
77
+ # NOTE: At a later stage, we might not want to create huge trees of slices.
78
+ # The fact that the root of the tree creates slices that link to it makes
79
+ # the tree already rather flat.
80
+
81
+ if parent
82
+ parent.slice(offset - parent.offset, length)
83
+ else
84
+ self.class.new(str.slice(start, length), offset+start, source, self)
85
+ end
86
+ end
87
+
88
+ # Returns a slice that starts at file offset start and that has length
89
+ # characters in it.
90
+ #
91
+ def abs_slice(start, length)
92
+ slice(start-offset, length)
93
+ end
94
+
95
+ # True if this slice can satisfy an original input request to the
96
+ # range ofs, len.
97
+ #
98
+ def satisfies?(ofs, len)
99
+ ofs >= offset && (ofs-offset+len-1)<str.size
100
+ end
101
+
102
+ def size
103
+ str.size
104
+ end
105
+ def +(other)
106
+ raise ArgumentError,
107
+ "Cannot concat something other than a slice to a slice." \
108
+ unless other.respond_to?(:to_slice)
109
+
110
+ raise Parslet::InvalidSliceOperation,
111
+ "Cannot join slices that aren't adjacent."+
112
+ " (#{self.inspect} + #{other.inspect})" \
113
+ if offset+size != other.offset
114
+
115
+ raise Parslet::InvalidSliceOperation, "Not from the same source." \
116
+ if source != other.source
117
+
118
+ # If both slices stem from the same bigger buffer, we can reslice that
119
+ # buffer to (probably) avoid a buffer copy, as long as the strings are
120
+ # not modified.
121
+ if parent && parent == other.parent
122
+ return parent.abs_slice(offset, size+other.size)
123
+ end
124
+
125
+ self.class.new(str + other.str, offset, source)
126
+ end
127
+
128
+ # Returns a <line, column> tuple referring to the original input.
129
+ #
130
+ def line_and_column
131
+ raise ArgumentError, "No source was given, cannot infer line and column." \
132
+ unless source
133
+
134
+ source.line_and_column(self.offset)
135
+ end
136
+
137
+
138
+ # Conversion operators -----------------------------------------------------
139
+ def to_str
140
+ str
141
+ end
142
+ alias to_s to_str
143
+
144
+ def to_slice
145
+ self
146
+ end
147
+ def to_sym
148
+ str.to_sym
149
+ end
150
+ def to_int
151
+ Integer(str)
152
+ end
153
+ def to_i
154
+ str.to_i
155
+ end
156
+ def to_f
157
+ str.to_f
158
+ end
159
+
160
+ # Inspection & Debugging ---------------------------------------------------
161
+
162
+ # Prints the slice as <code>"string"@offset</code>.
163
+ def inspect
164
+ str.inspect << "@#{offset}"
165
+ end
166
+ end
167
+
168
+ # Raised when trying to do an operation on slices that cannot succeed, like
169
+ # adding non-adjacent slices. See Parslet::Slice.
170
+ #
171
+ class Parslet::InvalidSliceOperation < StandardError
172
+ end
@@ -1,120 +1,109 @@
1
1
 
2
2
  require 'stringio'
3
3
 
4
+ require 'parslet/source/line_cache'
5
+
4
6
  # Wraps the input IO to parslet. The interface defined by this class is
5
7
  # smaller than what IO offers, but enhances it with a #column and #line
6
8
  # method for the current position.
7
9
  #
8
10
  class Parslet::Source
9
- attr_reader :line_ends
10
-
11
11
  def initialize(io)
12
12
  if io.respond_to? :to_str
13
13
  io = StringIO.new(io)
14
14
  end
15
15
 
16
16
  @io = io
17
- warn "Line counting will be off if the IO is not rewound." unless @io.pos==0
17
+ @virtual_position = @io.pos
18
+ @eof_position = nil
18
19
 
19
- # Stores line endings as a simple position number. The first line always
20
- # starts at 0; numbers beyond the biggest entry are on any line > size,
21
- # but probably make a scan to that position neccessary.
22
- @line_ends = []
23
- @line_ends.extend RangeSearch
20
+ @line_cache = LineCache.new
21
+
22
+ # Stores an array of <offset, buffer> tuples.
23
+ @slices = []
24
24
  end
25
25
 
26
+ # Reads n chars from the input and returns a Range instance.
27
+ #
26
28
  def read(n)
27
- start_pos = pos
28
- @io.read(n).tap { |buf| scan_for_line_endings(start_pos, buf) }
29
+ slice = read_from_cache(@virtual_position, n)
30
+ @virtual_position += slice.size
31
+
32
+ slice
29
33
  end
30
34
 
31
35
  def eof?
32
- @io.eof?
36
+ @eof_position && @virtual_position >= @eof_position
33
37
  end
34
-
35
38
  def pos
36
- @io.pos
39
+ @virtual_position
37
40
  end
38
-
39
- # NOTE: If you seek beyond the point that you last read, you will get
40
- # undefined behaviour. This is by design.
41
41
  def pos=(new_pos)
42
- @io.pos = new_pos
43
- end
44
-
45
- def line_and_column(position=nil)
46
- pos = (position || self.pos)
47
- eol_idx = @line_ends.lbound(pos)
48
-
49
- if eol_idx
50
- # eol_idx points to the offset that ends the current line.
51
- # Let's try to find the offset that starts it:
52
- offset = eol_idx>0 && @line_ends[eol_idx-1] || 0
53
- return [eol_idx+1, pos-offset+1]
54
- else
55
- # eol_idx is nil, that means that we're beyond the last line end that
56
- # we know about. Pretend for now that we're just on the last line.
57
- offset = @line_ends.last || 0
58
- return [@line_ends.size+1, pos-offset+1]
59
- end
42
+ @virtual_position = new_pos
60
43
  end
61
44
 
62
- # Mixin for arrays that implicitly give a number of ranges, where one range
63
- # begins where the other one ends.
64
- #
65
- # Example:
66
- #
67
- # [10, 20, 30]
68
- # # would describe [0, 10], (10, 20], (20, 30]
45
+ # Returns a <line, column> tuple for the given position. If no position is
46
+ # given, line/column information is returned for the current position given
47
+ # by #pos.
69
48
  #
70
- module RangeSearch
71
- # Scans the array for the first number that is > than bound. Returns the
72
- # index of that number.
73
- #
74
- def lbound(bound)
75
- return nil if empty?
76
- return nil unless last > bound
77
-
78
- left = 0
79
- right = size - 1
80
-
81
- n = 10
82
- loop do
83
- mid = left + (right - left) / 2
84
-
85
- if self[mid] > bound
86
- right = mid
87
- else
88
- # assert: self[mid] <= bound
89
- left = mid+1
90
- end
91
-
92
- if right <= left
93
- return right
94
- end
95
- end
96
- end
49
+ def line_and_column(position=nil)
50
+ @line_cache.line_and_column(position || self.pos)
97
51
  end
98
52
 
99
53
  private
54
+ # Minimal size of a single read
55
+ MIN_READ_SIZE = 10 * 1024
56
+ # Number of slices to keep
57
+ BUFFER_CACHE_SIZE = 10
58
+
59
+ # Reads and returns a piece of the input that contains length chars starting
60
+ # at offset.
61
+ #
62
+ def read_from_cache(offset, length)
63
+ # Do we already have a buffer that contains the given range?
64
+ # Return that.
65
+ slice = @slices.find { |slice|
66
+ slice.satisfies?(offset, length) }
67
+ return slice.abs_slice(offset, length) if slice
68
+
69
+ # Read a new buffer: Can the demand be satisfied by sequentially reading
70
+ # from the current position?
71
+ needed = offset-@io.pos+length
72
+ if @io.pos <= offset && needed<MIN_READ_SIZE
73
+ # read the slice
74
+ slice = read_slice(needed)
75
+ return slice.abs_slice(offset, length)
76
+ end
77
+
78
+ # Otherwise seek and read enough so that we can satisfy the demand.
79
+ @io.pos = offset
100
80
 
101
- def scan_for_line_endings(start_pos, buf)
102
- return unless buf
103
- return unless buf.index("\n")
104
- cur = -1
81
+ slice = read_slice(needed)
82
+ return slice.abs_slice(offset, length)
83
+ end
105
84
 
106
- # If we have already read part or all of buf, we already know about
107
- # line ends in that portion. remove it and correct cur (search index)
108
- if @last_line_end && start_pos < @last_line_end
109
- # Let's not search the range from start_pos to last_line_end again.
110
- cur = @last_line_end - start_pos -1
85
+ def read_slice(needed)
86
+ start = @io.pos
87
+ request = [MIN_READ_SIZE, needed].max
88
+ buf = @io.read(request)
89
+
90
+ # remember eof position
91
+ if !buf || buf.size<request
92
+ @eof_position = @io.pos
111
93
  end
94
+
95
+ # cache line ends
96
+ @line_cache.scan_for_line_endings(start, buf)
97
+
98
+ slice = Parslet::Slice.new(buf || '', start, self)
99
+
100
+ # Don't cache empty slices.
101
+ return slice unless buf
102
+
103
+ # cache the buffer (and eject old entries)
104
+ @slices << slice
105
+ @slices.shift if @slices.size > BUFFER_CACHE_SIZE
112
106
 
113
- # Scan the string for line endings; store the positions of all endings
114
- # in @line_ends.
115
- while buf && cur = buf.index("\n", cur+1)
116
- @last_line_end = (start_pos + cur+1)
117
- @line_ends << @last_line_end
118
- end
107
+ slice
119
108
  end
120
109
  end
@@ -0,0 +1,90 @@
1
+
2
+
3
+ class Parslet::Source
4
+ # A cache for line start positions.
5
+ #
6
+ class LineCache # :nodoc:
7
+ def initialize
8
+ # Stores line endings as a simple position number. The first line always
9
+ # starts at 0; numbers beyond the biggest entry are on any line > size,
10
+ # but probably make a scan to that position neccessary.
11
+ @line_ends = []
12
+ @line_ends.extend RangeSearch
13
+ end
14
+
15
+ # Returns a <line, column> tuple for the given input position.
16
+ #
17
+ def line_and_column(pos)
18
+ eol_idx = @line_ends.lbound(pos)
19
+
20
+ if eol_idx
21
+ # eol_idx points to the offset that ends the current line.
22
+ # Let's try to find the offset that starts it:
23
+ offset = eol_idx>0 && @line_ends[eol_idx-1] || 0
24
+ return [eol_idx+1, pos-offset+1]
25
+ else
26
+ # eol_idx is nil, that means that we're beyond the last line end that
27
+ # we know about. Pretend for now that we're just on the last line.
28
+ offset = @line_ends.last || 0
29
+ return [@line_ends.size+1, pos-offset+1]
30
+ end
31
+ end
32
+
33
+ def scan_for_line_endings(start_pos, buf)
34
+ return unless buf
35
+ return unless buf.index("\n")
36
+ cur = -1
37
+
38
+ # If we have already read part or all of buf, we already know about
39
+ # line ends in that portion. remove it and correct cur (search index)
40
+ if @last_line_end && start_pos < @last_line_end
41
+ # Let's not search the range from start_pos to last_line_end again.
42
+ cur = @last_line_end - start_pos -1
43
+ end
44
+
45
+ # Scan the string for line endings; store the positions of all endings
46
+ # in @line_ends.
47
+ while buf && cur = buf.index("\n", cur+1)
48
+ @last_line_end = (start_pos + cur+1)
49
+ @line_ends << @last_line_end
50
+ end
51
+ end
52
+ end
53
+
54
+ # Mixin for arrays that implicitly give a number of ranges, where one range
55
+ # begins where the other one ends.
56
+ #
57
+ # Example:
58
+ #
59
+ # [10, 20, 30]
60
+ # # would describe [0, 10], (10, 20], (20, 30]
61
+ #
62
+ module RangeSearch # :nodoc:
63
+ # Scans the array for the first number that is > than bound. Returns the
64
+ # index of that number.
65
+ #
66
+ def lbound(bound)
67
+ return nil if empty?
68
+ return nil unless last > bound
69
+
70
+ left = 0
71
+ right = size - 1
72
+
73
+ n = 10
74
+ loop do
75
+ mid = left + (right - left) / 2
76
+
77
+ if self[mid] > bound
78
+ right = mid
79
+ else
80
+ # assert: self[mid] <= bound
81
+ left = mid+1
82
+ end
83
+
84
+ if right <= left
85
+ return right
86
+ end
87
+ end
88
+ end
89
+ end
90
+ end