positionrange 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.txt +3 -0
- data/LICENSE.txt +662 -0
- data/README.txt +87 -0
- data/Rakefile +98 -0
- data/install.rb +30 -0
- data/lib/position_range/error.rb +28 -0
- data/lib/position_range/list.rb +505 -0
- data/lib/position_range/version.rb +9 -0
- data/lib/position_range.rb +249 -0
- data/lib/positionrange.rb +1 -0
- data/test/position_range_list_test.rb +449 -0
- data/test/position_range_test.rb +146 -0
- data/test/test_helper.rb +3 -0
- metadata +66 -0
@@ -0,0 +1,505 @@
|
|
1
|
+
#--#
|
2
|
+
# Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
|
3
|
+
#
|
4
|
+
# License:
|
5
|
+
# This file is part of the PositionRange Library. PositionRange is Free
|
6
|
+
# Software. You can run/distribute/modify PositionRange under the terms of
|
7
|
+
# the GNU Affero General Public License version 3. The Affero GPL states
|
8
|
+
# that running a modified version or a derivative work also requires you to
|
9
|
+
# make the sourcecode of that work available to everyone that can interact
|
10
|
+
# with it. We chose the Affero GPL to ensure that PositionRange remains open
|
11
|
+
# and libre (LICENSE.txt contains the full text of the legally binding
|
12
|
+
# license).
|
13
|
+
#++#
|
14
|
+
#
|
15
|
+
# Keeps a list of PositionRanges.
|
16
|
+
#
|
17
|
+
# Supports basic set operations, as well as many others, like
|
18
|
+
# clustering overlaps and getting sizes.
|
19
|
+
|
20
|
+
class PositionRange::List < Array
|
21
|
+
|
22
|
+
###### Regular expressions
|
23
|
+
|
24
|
+
# Check-regexps
|
25
|
+
CHECK_POSITION_RANGE_LIST_RE =
|
26
|
+
/^(#{PositionRange::BLOCK_POSITION_RANGE}(\:#{PositionRange::BLOCK_POSITION_RANGE})*)?$/
|
27
|
+
|
28
|
+
###### Class methods
|
29
|
+
|
30
|
+
# Parses a list of PositionRanges from a string.
|
31
|
+
#
|
32
|
+
# Syntax:
|
33
|
+
# <position range string>[:<position range string>]*
|
34
|
+
#
|
35
|
+
# Options:
|
36
|
+
# The argument pass_on_options allows you to give options to be
|
37
|
+
# passed on to the PositionRanges created from the string
|
38
|
+
#
|
39
|
+
def self.from_s(position_range_list_string, pass_on_options = {})
|
40
|
+
if position_range_list_string
|
41
|
+
if position_range_list_string !~ CHECK_POSITION_RANGE_LIST_RE
|
42
|
+
raise StandardError.new(), 'Invalid position_range_list string given: ' +
|
43
|
+
position_range_list_string
|
44
|
+
end
|
45
|
+
|
46
|
+
p_r_l = PositionRange::List.new
|
47
|
+
p_r_s_arr = position_range_list_string.split(':')
|
48
|
+
p_r_s_arr.each {|p_r_s|
|
49
|
+
p_r_l.push(PositionRange.from_s(p_r_s, pass_on_options))
|
50
|
+
}
|
51
|
+
return p_r_l
|
52
|
+
else
|
53
|
+
return PositionRange::List.new
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Returns a new PositionRangeList for the provided string, covering
|
58
|
+
# it from start to end (the 'string' can also be an array).
|
59
|
+
#
|
60
|
+
def self.new_around(string)
|
61
|
+
if string.size > 0
|
62
|
+
return PositionRange::List.new([PositionRange.new(0,string.size)])
|
63
|
+
else
|
64
|
+
return PositionRange::List.new
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
###### Methods
|
69
|
+
|
70
|
+
### Low level methods
|
71
|
+
|
72
|
+
# Checking, ranges, etc
|
73
|
+
|
74
|
+
# Returns the combined size of the ranges in this list.
|
75
|
+
#
|
76
|
+
def range_size
|
77
|
+
range_size = 0
|
78
|
+
self.each {|range|
|
79
|
+
range_size += range.size
|
80
|
+
}
|
81
|
+
return range_size
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns true if all PositionRanges in this list don't refer to
|
85
|
+
# positions bigger than size. Otherwise false.
|
86
|
+
#
|
87
|
+
# Attributes are ignored.
|
88
|
+
#
|
89
|
+
def below?(size)
|
90
|
+
return self.within?(
|
91
|
+
PositionRange::List.new([PositionRange.new(0,size)]))
|
92
|
+
end
|
93
|
+
|
94
|
+
# Returns true if all PositionRanges in this list fall within the
|
95
|
+
# PositionRanges in the given other PositionRange::List
|
96
|
+
#
|
97
|
+
# Attributes are ignored.
|
98
|
+
#
|
99
|
+
def within?(other)
|
100
|
+
if (self.dup.substract!(other, :ignore_attributes => true)).empty?
|
101
|
+
return true
|
102
|
+
else
|
103
|
+
return false
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Returns the index of the given PositionRange.
|
108
|
+
#
|
109
|
+
# Options
|
110
|
+
# <tt>:dont_ignore_attributes</tt> => true, finds the one that has
|
111
|
+
# also equal attributes, defaults to false
|
112
|
+
#
|
113
|
+
def index(position_range, options = {})
|
114
|
+
if options[:dont_ignore_attributes]
|
115
|
+
self.each_with_index do |s_p_r, i|
|
116
|
+
if position_range == s_p_r and position_range.has_equal_pointer_attributes?(s_p_r)
|
117
|
+
return i
|
118
|
+
end
|
119
|
+
end
|
120
|
+
return nil
|
121
|
+
else
|
122
|
+
super(position_range)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
### Low level operations
|
127
|
+
|
128
|
+
# Applies an intersection in the sense of Set theory.
|
129
|
+
#
|
130
|
+
# All PositionRanges and parts of PositionRanges that fall outside
|
131
|
+
# the PositionRanges given in the intersection_list are removed.
|
132
|
+
#
|
133
|
+
# Example:
|
134
|
+
# 1,5:7,8:10,12' becomes '2,5:11,12' after limiting to '2,6:11,40'
|
135
|
+
#
|
136
|
+
def &(other)
|
137
|
+
substraction_list = other.dup.invert!
|
138
|
+
return self.dup.substract!(substraction_list,:ignore_attributes => true)
|
139
|
+
end
|
140
|
+
|
141
|
+
# Applies a substraction in the sense of Set theory.
|
142
|
+
#
|
143
|
+
# See substract!
|
144
|
+
#
|
145
|
+
def -(other)
|
146
|
+
self.dup.substract!(other)
|
147
|
+
end
|
148
|
+
|
149
|
+
# Applies a substraction in the sense of Set theory.
|
150
|
+
#
|
151
|
+
# It removes all PositionRanges and parts of PositionRanges that overlap with the
|
152
|
+
# PositionRanges given as the other.
|
153
|
+
#
|
154
|
+
# So for example:
|
155
|
+
# 1,5:7,9:11,12' becomes '1,4:7,8:11,12' after substracting '4,6:8,9'
|
156
|
+
#
|
157
|
+
# Only substracts PositionRanges if all their attributes (except for first and
|
158
|
+
# last) are the same, unless ignore_attributes is specified.
|
159
|
+
#
|
160
|
+
# Options
|
161
|
+
# <tt>:ignore_attributes</tt> => Ignores attributes
|
162
|
+
#
|
163
|
+
def substract!(other,options = {})
|
164
|
+
ignore_attributes = options[:ignore_attributes]
|
165
|
+
|
166
|
+
sorted_self = self.sort
|
167
|
+
if sorted_self.size > 0 and other.size > 0
|
168
|
+
other = other.sort.merge_adjacents!
|
169
|
+
|
170
|
+
last_i = 0
|
171
|
+
other.each do |p_r|
|
172
|
+
i = last_i
|
173
|
+
while sorted_self[i] and sorted_self[i].end < p_r.begin
|
174
|
+
i += 1
|
175
|
+
end
|
176
|
+
last_i = i
|
177
|
+
while sorted_self[i] and sorted_self[i].begin < p_r.end
|
178
|
+
if ignore_attributes or sorted_self[i].has_equal_pointer_attributes?(p_r)
|
179
|
+
self_i = self.index(sorted_self[i], :dont_ignore_attributes => !ignore_attributes)
|
180
|
+
if sorted_self[i].begin < p_r.begin
|
181
|
+
copy = sorted_self[i].dup
|
182
|
+
sorted_self[i] = copy.new_dup(copy.begin, p_r.begin)
|
183
|
+
self[self_i] = sorted_self[i]
|
184
|
+
sorted_self.insert(i + 1, copy.new_dup(p_r.begin, copy.end))
|
185
|
+
self.insert(self_i + 1, sorted_self[i + 1])
|
186
|
+
i += 1
|
187
|
+
elsif sorted_self[i].end <= p_r.end
|
188
|
+
sorted_self.delete_at(i)
|
189
|
+
self.delete_at(self_i)
|
190
|
+
else
|
191
|
+
sorted_self[i] = sorted_self[i].new_dup(
|
192
|
+
p_r.end, sorted_self[i].end)
|
193
|
+
self[self_i] = sorted_self[i]
|
194
|
+
end
|
195
|
+
else
|
196
|
+
i += 1
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
return self
|
202
|
+
end
|
203
|
+
|
204
|
+
# Deletes the position_range that is specified.
|
205
|
+
#
|
206
|
+
def delete(p_r)
|
207
|
+
self.substract!(PositionRange::List.new([p_r]))
|
208
|
+
end
|
209
|
+
|
210
|
+
# Results in all positions being included, being excluded now, and
|
211
|
+
# all positions that were excluded, being included now, upto the
|
212
|
+
# range below maximum_size.
|
213
|
+
#
|
214
|
+
# NOTE: new ranges are created as PositionRanges, so references to
|
215
|
+
# objects or ordering_positions of subclasses are not maintained, as
|
216
|
+
# they are meaningless for inverted lists of ranges.
|
217
|
+
#
|
218
|
+
# NOTE: Also that self is sorted.
|
219
|
+
#
|
220
|
+
def invert!(maximum_size = PositionRange::MaximumSize)
|
221
|
+
if self.size > 0
|
222
|
+
self.sort!.merge_adjacents!
|
223
|
+
# sorts and prevents problems with adjacent ranges
|
224
|
+
if self[-1].end > maximum_size
|
225
|
+
raise PositionRange::Error.new(self[-1].begin, self[-1].end),
|
226
|
+
'PositionRange larger than the maximum'
|
227
|
+
end
|
228
|
+
start_point = 0
|
229
|
+
if self[0].begin > 0
|
230
|
+
self.insert(0, PositionRange.new(0, self[0].begin))
|
231
|
+
start_point += 1
|
232
|
+
end
|
233
|
+
if self.size > 1
|
234
|
+
(start_point...(self.size - 1)).each {|i|
|
235
|
+
self[i] = PositionRange.new(self[i].end, self[i + 1].begin)
|
236
|
+
}
|
237
|
+
end
|
238
|
+
if self[-1].end < maximum_size - 1
|
239
|
+
self[-1] = PositionRange.new(self[-1].end, maximum_size)
|
240
|
+
else
|
241
|
+
self.delete_at(-1)
|
242
|
+
end
|
243
|
+
elsif maximum_size > 0
|
244
|
+
self.push(PositionRange.new(0, maximum_size))
|
245
|
+
end
|
246
|
+
return self
|
247
|
+
end
|
248
|
+
|
249
|
+
# Makes sure that there are no non-overlapping borders between
|
250
|
+
# PositionRanges.
|
251
|
+
#
|
252
|
+
# The guaranteed situation after calling this method:
|
253
|
+
# * Multiple PositionRanges can refer to the same ranges, but if
|
254
|
+
# they do they will have the same begin and end position.
|
255
|
+
# * All positions associated with an object (a Link or an Authorship
|
256
|
+
# for example) will still be associated with that same object, but
|
257
|
+
# possibly through a different or a new PositionRange.
|
258
|
+
#
|
259
|
+
# Example:
|
260
|
+
# '3,7->a:5,9->b' lined up will be '3,5->a:5,7->a:5,7->b:7,9->b'
|
261
|
+
#
|
262
|
+
# Where the ->X indicates an association with object X
|
263
|
+
#
|
264
|
+
# This is used for simplifying PositionRanges for parsing Links into
|
265
|
+
# Logis.
|
266
|
+
#
|
267
|
+
def line_up_overlaps!
|
268
|
+
self.sort!.merge_adjacents!
|
269
|
+
# note that the merging and the sorting done by merge_adjacents
|
270
|
+
# assures that he PositionRanges are always sorted by
|
271
|
+
# begin-position AND size (short to long).
|
272
|
+
i = 0
|
273
|
+
while i < (self.size - 1)
|
274
|
+
if self[i].end > self[i + 1].begin
|
275
|
+
# found an overlap
|
276
|
+
if self[i].begin != self[i + 1].begin
|
277
|
+
# the beginnings are not lined up, so align them
|
278
|
+
self.insert(i + 1, self[i].new_dup(self[i + 1].begin, self[i].end))
|
279
|
+
self[i] = self[i].new_dup(self[i].begin, self[i + 1].begin)
|
280
|
+
i = -1; self.sort! # restart in case more than 1 overlap
|
281
|
+
elsif self[i].end != self[i + 1].end
|
282
|
+
# the beginnings are already lined up, now do the ends
|
283
|
+
if self[i].end < self[i + 1].end
|
284
|
+
# i is the shortest, so self[i].end is used
|
285
|
+
self.insert(i + 2, self[i + 1].new_dup(self[i].end, self[i + 1].end))
|
286
|
+
self[i + 1] = self[i + 1].new_dup(self[i + 1].begin, self[i].end)
|
287
|
+
else
|
288
|
+
# i + 1 is the shortest, so self[i + 1].end is used
|
289
|
+
self.insert(i + 2, self[i].new_dup(self[i + 1].end, self[i].end))
|
290
|
+
self[i] = self[i].new_dup(self[i].begin, self[i + 1].end)
|
291
|
+
end
|
292
|
+
i = -1; self.sort! # restart in case more than 1 overlap
|
293
|
+
end
|
294
|
+
end
|
295
|
+
i += 1
|
296
|
+
end
|
297
|
+
return self
|
298
|
+
end
|
299
|
+
|
300
|
+
# Simplifies the PositionRange::List by merging adjacent PositionRanges.
|
301
|
+
#
|
302
|
+
# Example:
|
303
|
+
# 1,4:4,7:10,11 => 1,7:10,11
|
304
|
+
#
|
305
|
+
# Only merges adjacent PositionRanges if all their attributes
|
306
|
+
# (except for first and last) are the same
|
307
|
+
#
|
308
|
+
def merge_adjacents!(options = {})
|
309
|
+
ignore_attributes = options[:ignore_attributes]
|
310
|
+
if self.size > 1
|
311
|
+
i = 0
|
312
|
+
while i < self.size
|
313
|
+
if self[i - 1].end == self[i].begin and
|
314
|
+
(ignore_attributes or self[i - 1].has_equal_pointer_attributes?(self[i]))
|
315
|
+
self[i - 1] = self[i - 1].new_dup(self[i - 1].begin, self[i].end)
|
316
|
+
self.delete_at(i)
|
317
|
+
else
|
318
|
+
i += 1
|
319
|
+
end
|
320
|
+
end
|
321
|
+
end
|
322
|
+
return self
|
323
|
+
end
|
324
|
+
|
325
|
+
# Translates the PositionRange::List in space, along the given vector.
|
326
|
+
#
|
327
|
+
def translate!(integer)
|
328
|
+
if !integer.kind_of?(Integer)
|
329
|
+
raise StandardError.new, 'Tried to translate a PositionRange::List with a non-integer'
|
330
|
+
end
|
331
|
+
(0...self.size).each {|i|
|
332
|
+
self[i] = self[i].new_dup(self[i].first + integer,self[i].last + integer)
|
333
|
+
}
|
334
|
+
return self
|
335
|
+
end
|
336
|
+
|
337
|
+
# The ranges_to_insert are inserted at the ranges_at_which_to_insert
|
338
|
+
# of this list, counted in range_size from it's beginning, and inter-
|
339
|
+
# luded with ranges_to_skip.
|
340
|
+
#
|
341
|
+
# So PositionRange::List.from_s('39,49:16,20').insert_at_ranges!(
|
342
|
+
# PositionRange::List.from_s('100,102:6,7'),
|
343
|
+
# PositionRange::List.from_s('10,12:19,20'),
|
344
|
+
# PositionRange::List.from_s('12,19'))
|
345
|
+
#
|
346
|
+
# will result in:
|
347
|
+
# PositionRange::List.from_s('39,49:100,102:6,7:16,20')
|
348
|
+
#
|
349
|
+
def insert_at_ranges!(ranges_to_insert, ranges_at_which_to_insert,
|
350
|
+
ranges_to_skip = [])
|
351
|
+
if ranges_to_insert.range_size != ranges_at_which_to_insert.range_size
|
352
|
+
raise StandardError, 'Ranges to insert, and at which to insert are ' +
|
353
|
+
'of different range_sizes: ' + ranges_to_insert.to_s + ', ' +
|
354
|
+
ranges_at_which_to_insert.to_s
|
355
|
+
end
|
356
|
+
ranges_to_act = ranges_at_which_to_insert.each {|p_r| p_r.action = :ins}.concat(
|
357
|
+
ranges_to_skip).sort!
|
358
|
+
|
359
|
+
i = -1
|
360
|
+
self_p = 0
|
361
|
+
ins_p = 0
|
362
|
+
ranges_to_act.each {|p_r|
|
363
|
+
while self_p < p_r.begin - 1
|
364
|
+
i += 1
|
365
|
+
self_p += self[i].size
|
366
|
+
end
|
367
|
+
if self_p > p_r.begin
|
368
|
+
copy = self[i]
|
369
|
+
cut = copy.end + p_r.begin - self_p
|
370
|
+
self[i] = copy.new_dup(copy.begin, cut)
|
371
|
+
self.insert(i + 1, copy.new_dup(cut, copy.end))
|
372
|
+
self_p = p_r.begin
|
373
|
+
end
|
374
|
+
if p_r.action == :ins
|
375
|
+
inner_p = 0
|
376
|
+
while inner_p < p_r.size
|
377
|
+
self.insert(i + 1, ranges_to_insert[ins_p])
|
378
|
+
inner_p += ranges_to_insert[ins_p].size
|
379
|
+
i += 1
|
380
|
+
ins_p += 1
|
381
|
+
end
|
382
|
+
end
|
383
|
+
self_p += p_r.size
|
384
|
+
}
|
385
|
+
return self
|
386
|
+
end
|
387
|
+
|
388
|
+
### Highlevel methods
|
389
|
+
|
390
|
+
# Translates the PositionRange::List into the relative space defined
|
391
|
+
# by the view_position_range_list
|
392
|
+
#
|
393
|
+
def translate_to_view(view_position_range_list)
|
394
|
+
relative = PositionRange::List.new
|
395
|
+
view_p = 0
|
396
|
+
view_position_range_list.each {|snippet_p_r|
|
397
|
+
translate_list = self & PositionRange::List.new([snippet_p_r])
|
398
|
+
vector = view_p - snippet_p_r.first
|
399
|
+
relative.concat(translate_list.translate!(vector))
|
400
|
+
view_p += snippet_p_r.size
|
401
|
+
}
|
402
|
+
relative.merge_adjacents!
|
403
|
+
return relative
|
404
|
+
end
|
405
|
+
|
406
|
+
# Translates the PositionRange::List into absolute space
|
407
|
+
#
|
408
|
+
def translate_from_view(view_position_range_list)
|
409
|
+
absolute = PositionRange::List.new
|
410
|
+
view_p = 0
|
411
|
+
view_position_range_list.each {|snippet_p_r|
|
412
|
+
translate_list = self & PositionRange::List.new(
|
413
|
+
[PositionRange.new(view_p,view_p + snippet_p_r.size)])
|
414
|
+
vector = snippet_p_r.first - view_p
|
415
|
+
absolute.concat(translate_list.translate!(vector))
|
416
|
+
view_p += snippet_p_r.size
|
417
|
+
}
|
418
|
+
absolute.merge_adjacents!
|
419
|
+
return absolute
|
420
|
+
end
|
421
|
+
|
422
|
+
# Stacks the PositionRanges in the List adjacent in a new
|
423
|
+
# PositionRange::List, while maintaining their size.
|
424
|
+
#
|
425
|
+
# So PositionRangeList.from_s('50,53:11,30').stack_adjacents
|
426
|
+
# returns: PositionRangeList.from_s('0,3:4,23')
|
427
|
+
#
|
428
|
+
# Options
|
429
|
+
# <tt>:space</tt> => The space to leave inbetween
|
430
|
+
#
|
431
|
+
def stack_adjacent(options = {})
|
432
|
+
space = options[:space] || 0
|
433
|
+
adjacent = PositionRange::List.new
|
434
|
+
adjacent_p = 0
|
435
|
+
self.collect do |p_r|
|
436
|
+
step = p_r.size
|
437
|
+
adjacent << PositionRange.new(adjacent_p, adjacent_p + step)
|
438
|
+
adjacent_p += step + space
|
439
|
+
end
|
440
|
+
return adjacent
|
441
|
+
end
|
442
|
+
|
443
|
+
# Adds all items to a cluster-array, where overlapping PositionRanges are
|
444
|
+
# added to the same cluster_array position.
|
445
|
+
#
|
446
|
+
# So PositionRange::List.from_s('1,2:1,2:10,18:14,18').cluster_overlaps will
|
447
|
+
# get you a cluster arr equal to the following:
|
448
|
+
#
|
449
|
+
# [PositionRange::List.from_s('1,2:1,2'),
|
450
|
+
# PositionRange::List.from_s('10,14'),
|
451
|
+
# PositionRange::List.from_s('14,18:14,18')]
|
452
|
+
#
|
453
|
+
# Except that the pointer_attributes are of course kept in order
|
454
|
+
#
|
455
|
+
def cluster_overlaps
|
456
|
+
if !self.empty?
|
457
|
+
lined_up_self = self.dup.line_up_overlaps!
|
458
|
+
clusters = [PositionRange::List.new().push(lined_up_self.shift)]
|
459
|
+
lined_up_self.each {|p_r|
|
460
|
+
if p_r == clusters.last[0]
|
461
|
+
clusters.last.push(p_r)
|
462
|
+
else
|
463
|
+
clusters.push(PositionRange::List.new([p_r]))
|
464
|
+
end
|
465
|
+
}
|
466
|
+
return clusters
|
467
|
+
else
|
468
|
+
return self.dup
|
469
|
+
end
|
470
|
+
end
|
471
|
+
|
472
|
+
# Returns a new string containing only the parts of the old string
|
473
|
+
# designated by position_ranges.
|
474
|
+
#
|
475
|
+
# Appends the string[position_range] in the order in which they are
|
476
|
+
# found in this list.
|
477
|
+
#
|
478
|
+
# Options
|
479
|
+
# <tt>:separator</tt> => The string to insert between the parts
|
480
|
+
#
|
481
|
+
def apply_to_string(string, options = {})
|
482
|
+
separator = options[:separator] || ''
|
483
|
+
new_string = ''
|
484
|
+
self.each {|p_r|
|
485
|
+
if p_r.end > string.size
|
486
|
+
raise StandardError, 'End-range bigger than string'
|
487
|
+
end
|
488
|
+
new_string += string[p_r] + separator
|
489
|
+
}
|
490
|
+
return new_string[0..-1 - separator.size]
|
491
|
+
end
|
492
|
+
|
493
|
+
### Parsing methods
|
494
|
+
|
495
|
+
# Parses a PositionRange::List to a string
|
496
|
+
#
|
497
|
+
def to_s
|
498
|
+
self.sort
|
499
|
+
p_r_l_string = ''
|
500
|
+
self.each {|p_r|
|
501
|
+
p_r_l_string += p_r.to_s + ':'
|
502
|
+
}
|
503
|
+
return p_r_l_string[0...-1]
|
504
|
+
end
|
505
|
+
end
|