positionrange 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,505 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the PositionRange Library. PositionRange is Free
6
+ # Software. You can run/distribute/modify PositionRange under the terms of
7
+ # the GNU Affero General Public License version 3. The Affero GPL states
8
+ # that running a modified version or a derivative work also requires you to
9
+ # make the sourcecode of that work available to everyone that can interact
10
+ # with it. We chose the Affero GPL to ensure that PositionRange remains open
11
+ # and libre (LICENSE.txt contains the full text of the legally binding
12
+ # license).
13
+ #++#
14
+ #
15
+ # Keeps a list of PositionRanges.
16
+ #
17
+ # Supports basic set operations, as well as many others, like
18
+ # clustering overlaps and getting sizes.
19
+
20
+ class PositionRange::List < Array
21
+
22
+ ###### Regular expressions
23
+
24
+ # Check-regexps
25
+ CHECK_POSITION_RANGE_LIST_RE =
26
+ /^(#{PositionRange::BLOCK_POSITION_RANGE}(\:#{PositionRange::BLOCK_POSITION_RANGE})*)?$/
27
+
28
+ ###### Class methods
29
+
30
+ # Parses a list of PositionRanges from a string.
31
+ #
32
+ # Syntax:
33
+ # <position range string>[:<position range string>]*
34
+ #
35
+ # Options:
36
+ # The argument pass_on_options allows you to give options to be
37
+ # passed on to the PositionRanges created from the string
38
+ #
39
+ def self.from_s(position_range_list_string, pass_on_options = {})
40
+ if position_range_list_string
41
+ if position_range_list_string !~ CHECK_POSITION_RANGE_LIST_RE
42
+ raise StandardError.new(), 'Invalid position_range_list string given: ' +
43
+ position_range_list_string
44
+ end
45
+
46
+ p_r_l = PositionRange::List.new
47
+ p_r_s_arr = position_range_list_string.split(':')
48
+ p_r_s_arr.each {|p_r_s|
49
+ p_r_l.push(PositionRange.from_s(p_r_s, pass_on_options))
50
+ }
51
+ return p_r_l
52
+ else
53
+ return PositionRange::List.new
54
+ end
55
+ end
56
+
57
+ # Returns a new PositionRangeList for the provided string, covering
58
+ # it from start to end (the 'string' can also be an array).
59
+ #
60
+ def self.new_around(string)
61
+ if string.size > 0
62
+ return PositionRange::List.new([PositionRange.new(0,string.size)])
63
+ else
64
+ return PositionRange::List.new
65
+ end
66
+ end
67
+
68
+ ###### Methods
69
+
70
+ ### Low level methods
71
+
72
+ # Checking, ranges, etc
73
+
74
+ # Returns the combined size of the ranges in this list.
75
+ #
76
+ def range_size
77
+ range_size = 0
78
+ self.each {|range|
79
+ range_size += range.size
80
+ }
81
+ return range_size
82
+ end
83
+
84
+ # Returns true if all PositionRanges in this list don't refer to
85
+ # positions bigger than size. Otherwise false.
86
+ #
87
+ # Attributes are ignored.
88
+ #
89
+ def below?(size)
90
+ return self.within?(
91
+ PositionRange::List.new([PositionRange.new(0,size)]))
92
+ end
93
+
94
+ # Returns true if all PositionRanges in this list fall within the
95
+ # PositionRanges in the given other PositionRange::List
96
+ #
97
+ # Attributes are ignored.
98
+ #
99
+ def within?(other)
100
+ if (self.dup.substract!(other, :ignore_attributes => true)).empty?
101
+ return true
102
+ else
103
+ return false
104
+ end
105
+ end
106
+
107
+ # Returns the index of the given PositionRange.
108
+ #
109
+ # Options
110
+ # <tt>:dont_ignore_attributes</tt> => true, finds the one that has
111
+ # also equal attributes, defaults to false
112
+ #
113
+ def index(position_range, options = {})
114
+ if options[:dont_ignore_attributes]
115
+ self.each_with_index do |s_p_r, i|
116
+ if position_range == s_p_r and position_range.has_equal_pointer_attributes?(s_p_r)
117
+ return i
118
+ end
119
+ end
120
+ return nil
121
+ else
122
+ super(position_range)
123
+ end
124
+ end
125
+
126
+ ### Low level operations
127
+
128
+ # Applies an intersection in the sense of Set theory.
129
+ #
130
+ # All PositionRanges and parts of PositionRanges that fall outside
131
+ # the PositionRanges given in the intersection_list are removed.
132
+ #
133
+ # Example:
134
+ # 1,5:7,8:10,12' becomes '2,5:11,12' after limiting to '2,6:11,40'
135
+ #
136
+ def &(other)
137
+ substraction_list = other.dup.invert!
138
+ return self.dup.substract!(substraction_list,:ignore_attributes => true)
139
+ end
140
+
141
+ # Applies a substraction in the sense of Set theory.
142
+ #
143
+ # See substract!
144
+ #
145
+ def -(other)
146
+ self.dup.substract!(other)
147
+ end
148
+
149
+ # Applies a substraction in the sense of Set theory.
150
+ #
151
+ # It removes all PositionRanges and parts of PositionRanges that overlap with the
152
+ # PositionRanges given as the other.
153
+ #
154
+ # So for example:
155
+ # 1,5:7,9:11,12' becomes '1,4:7,8:11,12' after substracting '4,6:8,9'
156
+ #
157
+ # Only substracts PositionRanges if all their attributes (except for first and
158
+ # last) are the same, unless ignore_attributes is specified.
159
+ #
160
+ # Options
161
+ # <tt>:ignore_attributes</tt> => Ignores attributes
162
+ #
163
+ def substract!(other,options = {})
164
+ ignore_attributes = options[:ignore_attributes]
165
+
166
+ sorted_self = self.sort
167
+ if sorted_self.size > 0 and other.size > 0
168
+ other = other.sort.merge_adjacents!
169
+
170
+ last_i = 0
171
+ other.each do |p_r|
172
+ i = last_i
173
+ while sorted_self[i] and sorted_self[i].end < p_r.begin
174
+ i += 1
175
+ end
176
+ last_i = i
177
+ while sorted_self[i] and sorted_self[i].begin < p_r.end
178
+ if ignore_attributes or sorted_self[i].has_equal_pointer_attributes?(p_r)
179
+ self_i = self.index(sorted_self[i], :dont_ignore_attributes => !ignore_attributes)
180
+ if sorted_self[i].begin < p_r.begin
181
+ copy = sorted_self[i].dup
182
+ sorted_self[i] = copy.new_dup(copy.begin, p_r.begin)
183
+ self[self_i] = sorted_self[i]
184
+ sorted_self.insert(i + 1, copy.new_dup(p_r.begin, copy.end))
185
+ self.insert(self_i + 1, sorted_self[i + 1])
186
+ i += 1
187
+ elsif sorted_self[i].end <= p_r.end
188
+ sorted_self.delete_at(i)
189
+ self.delete_at(self_i)
190
+ else
191
+ sorted_self[i] = sorted_self[i].new_dup(
192
+ p_r.end, sorted_self[i].end)
193
+ self[self_i] = sorted_self[i]
194
+ end
195
+ else
196
+ i += 1
197
+ end
198
+ end
199
+ end
200
+ end
201
+ return self
202
+ end
203
+
204
+ # Deletes the position_range that is specified.
205
+ #
206
+ def delete(p_r)
207
+ self.substract!(PositionRange::List.new([p_r]))
208
+ end
209
+
210
+ # Results in all positions being included, being excluded now, and
211
+ # all positions that were excluded, being included now, upto the
212
+ # range below maximum_size.
213
+ #
214
+ # NOTE: new ranges are created as PositionRanges, so references to
215
+ # objects or ordering_positions of subclasses are not maintained, as
216
+ # they are meaningless for inverted lists of ranges.
217
+ #
218
+ # NOTE: Also that self is sorted.
219
+ #
220
+ def invert!(maximum_size = PositionRange::MaximumSize)
221
+ if self.size > 0
222
+ self.sort!.merge_adjacents!
223
+ # sorts and prevents problems with adjacent ranges
224
+ if self[-1].end > maximum_size
225
+ raise PositionRange::Error.new(self[-1].begin, self[-1].end),
226
+ 'PositionRange larger than the maximum'
227
+ end
228
+ start_point = 0
229
+ if self[0].begin > 0
230
+ self.insert(0, PositionRange.new(0, self[0].begin))
231
+ start_point += 1
232
+ end
233
+ if self.size > 1
234
+ (start_point...(self.size - 1)).each {|i|
235
+ self[i] = PositionRange.new(self[i].end, self[i + 1].begin)
236
+ }
237
+ end
238
+ if self[-1].end < maximum_size - 1
239
+ self[-1] = PositionRange.new(self[-1].end, maximum_size)
240
+ else
241
+ self.delete_at(-1)
242
+ end
243
+ elsif maximum_size > 0
244
+ self.push(PositionRange.new(0, maximum_size))
245
+ end
246
+ return self
247
+ end
248
+
249
+ # Makes sure that there are no non-overlapping borders between
250
+ # PositionRanges.
251
+ #
252
+ # The guaranteed situation after calling this method:
253
+ # * Multiple PositionRanges can refer to the same ranges, but if
254
+ # they do they will have the same begin and end position.
255
+ # * All positions associated with an object (a Link or an Authorship
256
+ # for example) will still be associated with that same object, but
257
+ # possibly through a different or a new PositionRange.
258
+ #
259
+ # Example:
260
+ # '3,7->a:5,9->b' lined up will be '3,5->a:5,7->a:5,7->b:7,9->b'
261
+ #
262
+ # Where the ->X indicates an association with object X
263
+ #
264
+ # This is used for simplifying PositionRanges for parsing Links into
265
+ # Logis.
266
+ #
267
+ def line_up_overlaps!
268
+ self.sort!.merge_adjacents!
269
+ # note that the merging and the sorting done by merge_adjacents
270
+ # assures that he PositionRanges are always sorted by
271
+ # begin-position AND size (short to long).
272
+ i = 0
273
+ while i < (self.size - 1)
274
+ if self[i].end > self[i + 1].begin
275
+ # found an overlap
276
+ if self[i].begin != self[i + 1].begin
277
+ # the beginnings are not lined up, so align them
278
+ self.insert(i + 1, self[i].new_dup(self[i + 1].begin, self[i].end))
279
+ self[i] = self[i].new_dup(self[i].begin, self[i + 1].begin)
280
+ i = -1; self.sort! # restart in case more than 1 overlap
281
+ elsif self[i].end != self[i + 1].end
282
+ # the beginnings are already lined up, now do the ends
283
+ if self[i].end < self[i + 1].end
284
+ # i is the shortest, so self[i].end is used
285
+ self.insert(i + 2, self[i + 1].new_dup(self[i].end, self[i + 1].end))
286
+ self[i + 1] = self[i + 1].new_dup(self[i + 1].begin, self[i].end)
287
+ else
288
+ # i + 1 is the shortest, so self[i + 1].end is used
289
+ self.insert(i + 2, self[i].new_dup(self[i + 1].end, self[i].end))
290
+ self[i] = self[i].new_dup(self[i].begin, self[i + 1].end)
291
+ end
292
+ i = -1; self.sort! # restart in case more than 1 overlap
293
+ end
294
+ end
295
+ i += 1
296
+ end
297
+ return self
298
+ end
299
+
300
+ # Simplifies the PositionRange::List by merging adjacent PositionRanges.
301
+ #
302
+ # Example:
303
+ # 1,4:4,7:10,11 => 1,7:10,11
304
+ #
305
+ # Only merges adjacent PositionRanges if all their attributes
306
+ # (except for first and last) are the same
307
+ #
308
+ def merge_adjacents!(options = {})
309
+ ignore_attributes = options[:ignore_attributes]
310
+ if self.size > 1
311
+ i = 0
312
+ while i < self.size
313
+ if self[i - 1].end == self[i].begin and
314
+ (ignore_attributes or self[i - 1].has_equal_pointer_attributes?(self[i]))
315
+ self[i - 1] = self[i - 1].new_dup(self[i - 1].begin, self[i].end)
316
+ self.delete_at(i)
317
+ else
318
+ i += 1
319
+ end
320
+ end
321
+ end
322
+ return self
323
+ end
324
+
325
+ # Translates the PositionRange::List in space, along the given vector.
326
+ #
327
+ def translate!(integer)
328
+ if !integer.kind_of?(Integer)
329
+ raise StandardError.new, 'Tried to translate a PositionRange::List with a non-integer'
330
+ end
331
+ (0...self.size).each {|i|
332
+ self[i] = self[i].new_dup(self[i].first + integer,self[i].last + integer)
333
+ }
334
+ return self
335
+ end
336
+
337
+ # The ranges_to_insert are inserted at the ranges_at_which_to_insert
338
+ # of this list, counted in range_size from it's beginning, and inter-
339
+ # luded with ranges_to_skip.
340
+ #
341
+ # So PositionRange::List.from_s('39,49:16,20').insert_at_ranges!(
342
+ # PositionRange::List.from_s('100,102:6,7'),
343
+ # PositionRange::List.from_s('10,12:19,20'),
344
+ # PositionRange::List.from_s('12,19'))
345
+ #
346
+ # will result in:
347
+ # PositionRange::List.from_s('39,49:100,102:6,7:16,20')
348
+ #
349
+ def insert_at_ranges!(ranges_to_insert, ranges_at_which_to_insert,
350
+ ranges_to_skip = [])
351
+ if ranges_to_insert.range_size != ranges_at_which_to_insert.range_size
352
+ raise StandardError, 'Ranges to insert, and at which to insert are ' +
353
+ 'of different range_sizes: ' + ranges_to_insert.to_s + ', ' +
354
+ ranges_at_which_to_insert.to_s
355
+ end
356
+ ranges_to_act = ranges_at_which_to_insert.each {|p_r| p_r.action = :ins}.concat(
357
+ ranges_to_skip).sort!
358
+
359
+ i = -1
360
+ self_p = 0
361
+ ins_p = 0
362
+ ranges_to_act.each {|p_r|
363
+ while self_p < p_r.begin - 1
364
+ i += 1
365
+ self_p += self[i].size
366
+ end
367
+ if self_p > p_r.begin
368
+ copy = self[i]
369
+ cut = copy.end + p_r.begin - self_p
370
+ self[i] = copy.new_dup(copy.begin, cut)
371
+ self.insert(i + 1, copy.new_dup(cut, copy.end))
372
+ self_p = p_r.begin
373
+ end
374
+ if p_r.action == :ins
375
+ inner_p = 0
376
+ while inner_p < p_r.size
377
+ self.insert(i + 1, ranges_to_insert[ins_p])
378
+ inner_p += ranges_to_insert[ins_p].size
379
+ i += 1
380
+ ins_p += 1
381
+ end
382
+ end
383
+ self_p += p_r.size
384
+ }
385
+ return self
386
+ end
387
+
388
+ ### Highlevel methods
389
+
390
+ # Translates the PositionRange::List into the relative space defined
391
+ # by the view_position_range_list
392
+ #
393
+ def translate_to_view(view_position_range_list)
394
+ relative = PositionRange::List.new
395
+ view_p = 0
396
+ view_position_range_list.each {|snippet_p_r|
397
+ translate_list = self & PositionRange::List.new([snippet_p_r])
398
+ vector = view_p - snippet_p_r.first
399
+ relative.concat(translate_list.translate!(vector))
400
+ view_p += snippet_p_r.size
401
+ }
402
+ relative.merge_adjacents!
403
+ return relative
404
+ end
405
+
406
+ # Translates the PositionRange::List into absolute space
407
+ #
408
+ def translate_from_view(view_position_range_list)
409
+ absolute = PositionRange::List.new
410
+ view_p = 0
411
+ view_position_range_list.each {|snippet_p_r|
412
+ translate_list = self & PositionRange::List.new(
413
+ [PositionRange.new(view_p,view_p + snippet_p_r.size)])
414
+ vector = snippet_p_r.first - view_p
415
+ absolute.concat(translate_list.translate!(vector))
416
+ view_p += snippet_p_r.size
417
+ }
418
+ absolute.merge_adjacents!
419
+ return absolute
420
+ end
421
+
422
+ # Stacks the PositionRanges in the List adjacent in a new
423
+ # PositionRange::List, while maintaining their size.
424
+ #
425
+ # So PositionRangeList.from_s('50,53:11,30').stack_adjacents
426
+ # returns: PositionRangeList.from_s('0,3:4,23')
427
+ #
428
+ # Options
429
+ # <tt>:space</tt> => The space to leave inbetween
430
+ #
431
+ def stack_adjacent(options = {})
432
+ space = options[:space] || 0
433
+ adjacent = PositionRange::List.new
434
+ adjacent_p = 0
435
+ self.collect do |p_r|
436
+ step = p_r.size
437
+ adjacent << PositionRange.new(adjacent_p, adjacent_p + step)
438
+ adjacent_p += step + space
439
+ end
440
+ return adjacent
441
+ end
442
+
443
+ # Adds all items to a cluster-array, where overlapping PositionRanges are
444
+ # added to the same cluster_array position.
445
+ #
446
+ # So PositionRange::List.from_s('1,2:1,2:10,18:14,18').cluster_overlaps will
447
+ # get you a cluster arr equal to the following:
448
+ #
449
+ # [PositionRange::List.from_s('1,2:1,2'),
450
+ # PositionRange::List.from_s('10,14'),
451
+ # PositionRange::List.from_s('14,18:14,18')]
452
+ #
453
+ # Except that the pointer_attributes are of course kept in order
454
+ #
455
+ def cluster_overlaps
456
+ if !self.empty?
457
+ lined_up_self = self.dup.line_up_overlaps!
458
+ clusters = [PositionRange::List.new().push(lined_up_self.shift)]
459
+ lined_up_self.each {|p_r|
460
+ if p_r == clusters.last[0]
461
+ clusters.last.push(p_r)
462
+ else
463
+ clusters.push(PositionRange::List.new([p_r]))
464
+ end
465
+ }
466
+ return clusters
467
+ else
468
+ return self.dup
469
+ end
470
+ end
471
+
472
+ # Returns a new string containing only the parts of the old string
473
+ # designated by position_ranges.
474
+ #
475
+ # Appends the string[position_range] in the order in which they are
476
+ # found in this list.
477
+ #
478
+ # Options
479
+ # <tt>:separator</tt> => The string to insert between the parts
480
+ #
481
+ def apply_to_string(string, options = {})
482
+ separator = options[:separator] || ''
483
+ new_string = ''
484
+ self.each {|p_r|
485
+ if p_r.end > string.size
486
+ raise StandardError, 'End-range bigger than string'
487
+ end
488
+ new_string += string[p_r] + separator
489
+ }
490
+ return new_string[0..-1 - separator.size]
491
+ end
492
+
493
+ ### Parsing methods
494
+
495
+ # Parses a PositionRange::List to a string
496
+ #
497
+ def to_s
498
+ self.sort
499
+ p_r_l_string = ''
500
+ self.each {|p_r|
501
+ p_r_l_string += p_r.to_s + ':'
502
+ }
503
+ return p_r_l_string[0...-1]
504
+ end
505
+ end
@@ -0,0 +1,9 @@
1
+ module PositionRange
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 6
5
+ TINY = 0
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end