positionrange 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,505 @@
1
+ #--#
2
+ # Copyright: (c) 2006-2008 The LogiLogi Foundation <foundation@logilogi.org>
3
+ #
4
+ # License:
5
+ # This file is part of the PositionRange Library. PositionRange is Free
6
+ # Software. You can run/distribute/modify PositionRange under the terms of
7
+ # the GNU Affero General Public License version 3. The Affero GPL states
8
+ # that running a modified version or a derivative work also requires you to
9
+ # make the sourcecode of that work available to everyone that can interact
10
+ # with it. We chose the Affero GPL to ensure that PositionRange remains open
11
+ # and libre (LICENSE.txt contains the full text of the legally binding
12
+ # license).
13
+ #++#
14
+ #
15
+ # Keeps a list of PositionRanges.
16
+ #
17
+ # Supports basic set operations, as well as many others, like
18
+ # clustering overlaps and getting sizes.
19
+
20
+ class PositionRange::List < Array
21
+
22
+ ###### Regular expressions
23
+
24
+ # Check-regexps
25
+ CHECK_POSITION_RANGE_LIST_RE =
26
+ /^(#{PositionRange::BLOCK_POSITION_RANGE}(\:#{PositionRange::BLOCK_POSITION_RANGE})*)?$/
27
+
28
+ ###### Class methods
29
+
30
+ # Parses a list of PositionRanges from a string.
31
+ #
32
+ # Syntax:
33
+ # <position range string>[:<position range string>]*
34
+ #
35
+ # Options:
36
+ # The argument pass_on_options allows you to give options to be
37
+ # passed on to the PositionRanges created from the string
38
+ #
39
+ def self.from_s(position_range_list_string, pass_on_options = {})
40
+ if position_range_list_string
41
+ if position_range_list_string !~ CHECK_POSITION_RANGE_LIST_RE
42
+ raise StandardError.new(), 'Invalid position_range_list string given: ' +
43
+ position_range_list_string
44
+ end
45
+
46
+ p_r_l = PositionRange::List.new
47
+ p_r_s_arr = position_range_list_string.split(':')
48
+ p_r_s_arr.each {|p_r_s|
49
+ p_r_l.push(PositionRange.from_s(p_r_s, pass_on_options))
50
+ }
51
+ return p_r_l
52
+ else
53
+ return PositionRange::List.new
54
+ end
55
+ end
56
+
57
+ # Returns a new PositionRangeList for the provided string, covering
58
+ # it from start to end (the 'string' can also be an array).
59
+ #
60
+ def self.new_around(string)
61
+ if string.size > 0
62
+ return PositionRange::List.new([PositionRange.new(0,string.size)])
63
+ else
64
+ return PositionRange::List.new
65
+ end
66
+ end
67
+
68
+ ###### Methods
69
+
70
+ ### Low level methods
71
+
72
+ # Checking, ranges, etc
73
+
74
+ # Returns the combined size of the ranges in this list.
75
+ #
76
+ def range_size
77
+ range_size = 0
78
+ self.each {|range|
79
+ range_size += range.size
80
+ }
81
+ return range_size
82
+ end
83
+
84
+ # Returns true if all PositionRanges in this list don't refer to
85
+ # positions bigger than size. Otherwise false.
86
+ #
87
+ # Attributes are ignored.
88
+ #
89
+ def below?(size)
90
+ return self.within?(
91
+ PositionRange::List.new([PositionRange.new(0,size)]))
92
+ end
93
+
94
+ # Returns true if all PositionRanges in this list fall within the
95
+ # PositionRanges in the given other PositionRange::List
96
+ #
97
+ # Attributes are ignored.
98
+ #
99
+ def within?(other)
100
+ if (self.dup.substract!(other, :ignore_attributes => true)).empty?
101
+ return true
102
+ else
103
+ return false
104
+ end
105
+ end
106
+
107
+ # Returns the index of the given PositionRange.
108
+ #
109
+ # Options
110
+ # <tt>:dont_ignore_attributes</tt> => true, finds the one that has
111
+ # also equal attributes, defaults to false
112
+ #
113
+ def index(position_range, options = {})
114
+ if options[:dont_ignore_attributes]
115
+ self.each_with_index do |s_p_r, i|
116
+ if position_range == s_p_r and position_range.has_equal_pointer_attributes?(s_p_r)
117
+ return i
118
+ end
119
+ end
120
+ return nil
121
+ else
122
+ super(position_range)
123
+ end
124
+ end
125
+
126
+ ### Low level operations
127
+
128
+ # Applies an intersection in the sense of Set theory.
129
+ #
130
+ # All PositionRanges and parts of PositionRanges that fall outside
131
+ # the PositionRanges given in the intersection_list are removed.
132
+ #
133
+ # Example:
134
+ # 1,5:7,8:10,12' becomes '2,5:11,12' after limiting to '2,6:11,40'
135
+ #
136
+ def &(other)
137
+ substraction_list = other.dup.invert!
138
+ return self.dup.substract!(substraction_list,:ignore_attributes => true)
139
+ end
140
+
141
+ # Applies a substraction in the sense of Set theory.
142
+ #
143
+ # See substract!
144
+ #
145
+ def -(other)
146
+ self.dup.substract!(other)
147
+ end
148
+
149
+ # Applies a substraction in the sense of Set theory.
150
+ #
151
+ # It removes all PositionRanges and parts of PositionRanges that overlap with the
152
+ # PositionRanges given as the other.
153
+ #
154
+ # So for example:
155
+ # 1,5:7,9:11,12' becomes '1,4:7,8:11,12' after substracting '4,6:8,9'
156
+ #
157
+ # Only substracts PositionRanges if all their attributes (except for first and
158
+ # last) are the same, unless ignore_attributes is specified.
159
+ #
160
+ # Options
161
+ # <tt>:ignore_attributes</tt> => Ignores attributes
162
+ #
163
+ def substract!(other,options = {})
164
+ ignore_attributes = options[:ignore_attributes]
165
+
166
+ sorted_self = self.sort
167
+ if sorted_self.size > 0 and other.size > 0
168
+ other = other.sort.merge_adjacents!
169
+
170
+ last_i = 0
171
+ other.each do |p_r|
172
+ i = last_i
173
+ while sorted_self[i] and sorted_self[i].end < p_r.begin
174
+ i += 1
175
+ end
176
+ last_i = i
177
+ while sorted_self[i] and sorted_self[i].begin < p_r.end
178
+ if ignore_attributes or sorted_self[i].has_equal_pointer_attributes?(p_r)
179
+ self_i = self.index(sorted_self[i], :dont_ignore_attributes => !ignore_attributes)
180
+ if sorted_self[i].begin < p_r.begin
181
+ copy = sorted_self[i].dup
182
+ sorted_self[i] = copy.new_dup(copy.begin, p_r.begin)
183
+ self[self_i] = sorted_self[i]
184
+ sorted_self.insert(i + 1, copy.new_dup(p_r.begin, copy.end))
185
+ self.insert(self_i + 1, sorted_self[i + 1])
186
+ i += 1
187
+ elsif sorted_self[i].end <= p_r.end
188
+ sorted_self.delete_at(i)
189
+ self.delete_at(self_i)
190
+ else
191
+ sorted_self[i] = sorted_self[i].new_dup(
192
+ p_r.end, sorted_self[i].end)
193
+ self[self_i] = sorted_self[i]
194
+ end
195
+ else
196
+ i += 1
197
+ end
198
+ end
199
+ end
200
+ end
201
+ return self
202
+ end
203
+
204
+ # Deletes the position_range that is specified.
205
+ #
206
+ def delete(p_r)
207
+ self.substract!(PositionRange::List.new([p_r]))
208
+ end
209
+
210
+ # Results in all positions being included, being excluded now, and
211
+ # all positions that were excluded, being included now, upto the
212
+ # range below maximum_size.
213
+ #
214
+ # NOTE: new ranges are created as PositionRanges, so references to
215
+ # objects or ordering_positions of subclasses are not maintained, as
216
+ # they are meaningless for inverted lists of ranges.
217
+ #
218
+ # NOTE: Also that self is sorted.
219
+ #
220
+ def invert!(maximum_size = PositionRange::MaximumSize)
221
+ if self.size > 0
222
+ self.sort!.merge_adjacents!
223
+ # sorts and prevents problems with adjacent ranges
224
+ if self[-1].end > maximum_size
225
+ raise PositionRange::Error.new(self[-1].begin, self[-1].end),
226
+ 'PositionRange larger than the maximum'
227
+ end
228
+ start_point = 0
229
+ if self[0].begin > 0
230
+ self.insert(0, PositionRange.new(0, self[0].begin))
231
+ start_point += 1
232
+ end
233
+ if self.size > 1
234
+ (start_point...(self.size - 1)).each {|i|
235
+ self[i] = PositionRange.new(self[i].end, self[i + 1].begin)
236
+ }
237
+ end
238
+ if self[-1].end < maximum_size - 1
239
+ self[-1] = PositionRange.new(self[-1].end, maximum_size)
240
+ else
241
+ self.delete_at(-1)
242
+ end
243
+ elsif maximum_size > 0
244
+ self.push(PositionRange.new(0, maximum_size))
245
+ end
246
+ return self
247
+ end
248
+
249
+ # Makes sure that there are no non-overlapping borders between
250
+ # PositionRanges.
251
+ #
252
+ # The guaranteed situation after calling this method:
253
+ # * Multiple PositionRanges can refer to the same ranges, but if
254
+ # they do they will have the same begin and end position.
255
+ # * All positions associated with an object (a Link or an Authorship
256
+ # for example) will still be associated with that same object, but
257
+ # possibly through a different or a new PositionRange.
258
+ #
259
+ # Example:
260
+ # '3,7->a:5,9->b' lined up will be '3,5->a:5,7->a:5,7->b:7,9->b'
261
+ #
262
+ # Where the ->X indicates an association with object X
263
+ #
264
+ # This is used for simplifying PositionRanges for parsing Links into
265
+ # Logis.
266
+ #
267
+ def line_up_overlaps!
268
+ self.sort!.merge_adjacents!
269
+ # note that the merging and the sorting done by merge_adjacents
270
+ # assures that he PositionRanges are always sorted by
271
+ # begin-position AND size (short to long).
272
+ i = 0
273
+ while i < (self.size - 1)
274
+ if self[i].end > self[i + 1].begin
275
+ # found an overlap
276
+ if self[i].begin != self[i + 1].begin
277
+ # the beginnings are not lined up, so align them
278
+ self.insert(i + 1, self[i].new_dup(self[i + 1].begin, self[i].end))
279
+ self[i] = self[i].new_dup(self[i].begin, self[i + 1].begin)
280
+ i = -1; self.sort! # restart in case more than 1 overlap
281
+ elsif self[i].end != self[i + 1].end
282
+ # the beginnings are already lined up, now do the ends
283
+ if self[i].end < self[i + 1].end
284
+ # i is the shortest, so self[i].end is used
285
+ self.insert(i + 2, self[i + 1].new_dup(self[i].end, self[i + 1].end))
286
+ self[i + 1] = self[i + 1].new_dup(self[i + 1].begin, self[i].end)
287
+ else
288
+ # i + 1 is the shortest, so self[i + 1].end is used
289
+ self.insert(i + 2, self[i].new_dup(self[i + 1].end, self[i].end))
290
+ self[i] = self[i].new_dup(self[i].begin, self[i + 1].end)
291
+ end
292
+ i = -1; self.sort! # restart in case more than 1 overlap
293
+ end
294
+ end
295
+ i += 1
296
+ end
297
+ return self
298
+ end
299
+
300
+ # Simplifies the PositionRange::List by merging adjacent PositionRanges.
301
+ #
302
+ # Example:
303
+ # 1,4:4,7:10,11 => 1,7:10,11
304
+ #
305
+ # Only merges adjacent PositionRanges if all their attributes
306
+ # (except for first and last) are the same
307
+ #
308
+ def merge_adjacents!(options = {})
309
+ ignore_attributes = options[:ignore_attributes]
310
+ if self.size > 1
311
+ i = 0
312
+ while i < self.size
313
+ if self[i - 1].end == self[i].begin and
314
+ (ignore_attributes or self[i - 1].has_equal_pointer_attributes?(self[i]))
315
+ self[i - 1] = self[i - 1].new_dup(self[i - 1].begin, self[i].end)
316
+ self.delete_at(i)
317
+ else
318
+ i += 1
319
+ end
320
+ end
321
+ end
322
+ return self
323
+ end
324
+
325
+ # Translates the PositionRange::List in space, along the given vector.
326
+ #
327
+ def translate!(integer)
328
+ if !integer.kind_of?(Integer)
329
+ raise StandardError.new, 'Tried to translate a PositionRange::List with a non-integer'
330
+ end
331
+ (0...self.size).each {|i|
332
+ self[i] = self[i].new_dup(self[i].first + integer,self[i].last + integer)
333
+ }
334
+ return self
335
+ end
336
+
337
+ # The ranges_to_insert are inserted at the ranges_at_which_to_insert
338
+ # of this list, counted in range_size from it's beginning, and inter-
339
+ # luded with ranges_to_skip.
340
+ #
341
+ # So PositionRange::List.from_s('39,49:16,20').insert_at_ranges!(
342
+ # PositionRange::List.from_s('100,102:6,7'),
343
+ # PositionRange::List.from_s('10,12:19,20'),
344
+ # PositionRange::List.from_s('12,19'))
345
+ #
346
+ # will result in:
347
+ # PositionRange::List.from_s('39,49:100,102:6,7:16,20')
348
+ #
349
+ def insert_at_ranges!(ranges_to_insert, ranges_at_which_to_insert,
350
+ ranges_to_skip = [])
351
+ if ranges_to_insert.range_size != ranges_at_which_to_insert.range_size
352
+ raise StandardError, 'Ranges to insert, and at which to insert are ' +
353
+ 'of different range_sizes: ' + ranges_to_insert.to_s + ', ' +
354
+ ranges_at_which_to_insert.to_s
355
+ end
356
+ ranges_to_act = ranges_at_which_to_insert.each {|p_r| p_r.action = :ins}.concat(
357
+ ranges_to_skip).sort!
358
+
359
+ i = -1
360
+ self_p = 0
361
+ ins_p = 0
362
+ ranges_to_act.each {|p_r|
363
+ while self_p < p_r.begin - 1
364
+ i += 1
365
+ self_p += self[i].size
366
+ end
367
+ if self_p > p_r.begin
368
+ copy = self[i]
369
+ cut = copy.end + p_r.begin - self_p
370
+ self[i] = copy.new_dup(copy.begin, cut)
371
+ self.insert(i + 1, copy.new_dup(cut, copy.end))
372
+ self_p = p_r.begin
373
+ end
374
+ if p_r.action == :ins
375
+ inner_p = 0
376
+ while inner_p < p_r.size
377
+ self.insert(i + 1, ranges_to_insert[ins_p])
378
+ inner_p += ranges_to_insert[ins_p].size
379
+ i += 1
380
+ ins_p += 1
381
+ end
382
+ end
383
+ self_p += p_r.size
384
+ }
385
+ return self
386
+ end
387
+
388
+ ### Highlevel methods
389
+
390
+ # Translates the PositionRange::List into the relative space defined
391
+ # by the view_position_range_list
392
+ #
393
+ def translate_to_view(view_position_range_list)
394
+ relative = PositionRange::List.new
395
+ view_p = 0
396
+ view_position_range_list.each {|snippet_p_r|
397
+ translate_list = self & PositionRange::List.new([snippet_p_r])
398
+ vector = view_p - snippet_p_r.first
399
+ relative.concat(translate_list.translate!(vector))
400
+ view_p += snippet_p_r.size
401
+ }
402
+ relative.merge_adjacents!
403
+ return relative
404
+ end
405
+
406
+ # Translates the PositionRange::List into absolute space
407
+ #
408
+ def translate_from_view(view_position_range_list)
409
+ absolute = PositionRange::List.new
410
+ view_p = 0
411
+ view_position_range_list.each {|snippet_p_r|
412
+ translate_list = self & PositionRange::List.new(
413
+ [PositionRange.new(view_p,view_p + snippet_p_r.size)])
414
+ vector = snippet_p_r.first - view_p
415
+ absolute.concat(translate_list.translate!(vector))
416
+ view_p += snippet_p_r.size
417
+ }
418
+ absolute.merge_adjacents!
419
+ return absolute
420
+ end
421
+
422
+ # Stacks the PositionRanges in the List adjacent in a new
423
+ # PositionRange::List, while maintaining their size.
424
+ #
425
+ # So PositionRangeList.from_s('50,53:11,30').stack_adjacents
426
+ # returns: PositionRangeList.from_s('0,3:4,23')
427
+ #
428
+ # Options
429
+ # <tt>:space</tt> => The space to leave inbetween
430
+ #
431
+ def stack_adjacent(options = {})
432
+ space = options[:space] || 0
433
+ adjacent = PositionRange::List.new
434
+ adjacent_p = 0
435
+ self.collect do |p_r|
436
+ step = p_r.size
437
+ adjacent << PositionRange.new(adjacent_p, adjacent_p + step)
438
+ adjacent_p += step + space
439
+ end
440
+ return adjacent
441
+ end
442
+
443
+ # Adds all items to a cluster-array, where overlapping PositionRanges are
444
+ # added to the same cluster_array position.
445
+ #
446
+ # So PositionRange::List.from_s('1,2:1,2:10,18:14,18').cluster_overlaps will
447
+ # get you a cluster arr equal to the following:
448
+ #
449
+ # [PositionRange::List.from_s('1,2:1,2'),
450
+ # PositionRange::List.from_s('10,14'),
451
+ # PositionRange::List.from_s('14,18:14,18')]
452
+ #
453
+ # Except that the pointer_attributes are of course kept in order
454
+ #
455
+ def cluster_overlaps
456
+ if !self.empty?
457
+ lined_up_self = self.dup.line_up_overlaps!
458
+ clusters = [PositionRange::List.new().push(lined_up_self.shift)]
459
+ lined_up_self.each {|p_r|
460
+ if p_r == clusters.last[0]
461
+ clusters.last.push(p_r)
462
+ else
463
+ clusters.push(PositionRange::List.new([p_r]))
464
+ end
465
+ }
466
+ return clusters
467
+ else
468
+ return self.dup
469
+ end
470
+ end
471
+
472
+ # Returns a new string containing only the parts of the old string
473
+ # designated by position_ranges.
474
+ #
475
+ # Appends the string[position_range] in the order in which they are
476
+ # found in this list.
477
+ #
478
+ # Options
479
+ # <tt>:separator</tt> => The string to insert between the parts
480
+ #
481
+ def apply_to_string(string, options = {})
482
+ separator = options[:separator] || ''
483
+ new_string = ''
484
+ self.each {|p_r|
485
+ if p_r.end > string.size
486
+ raise StandardError, 'End-range bigger than string'
487
+ end
488
+ new_string += string[p_r] + separator
489
+ }
490
+ return new_string[0..-1 - separator.size]
491
+ end
492
+
493
+ ### Parsing methods
494
+
495
+ # Parses a PositionRange::List to a string
496
+ #
497
+ def to_s
498
+ self.sort
499
+ p_r_l_string = ''
500
+ self.each {|p_r|
501
+ p_r_l_string += p_r.to_s + ':'
502
+ }
503
+ return p_r_l_string[0...-1]
504
+ end
505
+ end
@@ -0,0 +1,9 @@
1
+ module PositionRange
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 6
5
+ TINY = 0
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end