plain_text 0.4 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -81,6 +81,7 @@ module PlainText
81
81
  even_num: 'even number of elements must be specified.',
82
82
  use_to_a: 'To handle it as an Array, use to_a first.',
83
83
  }
84
+ private_constant :ERR_MSGS
84
85
 
85
86
  # @param arin [Array] of [Paragraph1, Boundary1, Para2, Bd2, ...] or Part/Paragraph if boundaries is given
86
87
  # @param boundaries [Array] of Boundary
@@ -283,6 +284,108 @@ module PlainText
283
284
  map_part_core(with_index: false, **kwd, &bl)
284
285
  end
285
286
 
287
+ # merge parts/paragraphs if they satisfy the conditions.
288
+ #
289
+ # A group of two Parts/Paragraphs and the Boundaries in between and before and after
290
+ # is passed to the block consecutively.
291
+ #
292
+ # @yield [ary, b1, b2, i] Returns true if the two paragraphs should be merged.
293
+ # @yieldparam [Array] ary of [Para1st, BoundaryBetween, Para2nd]
294
+ # @yieldparam [Boundary] b1 Boundary-String before the first part/paragraph (nil for the first one)
295
+ # @yieldparam [Boundary] b2 Boundary-String after the second part/paragraph
296
+ # @yieldparam [Integer] i Index of the first Para
297
+ # @yieldreturn [Boolean, Symbol] True if they should be merged. :abort if cancel it.
298
+ # @return [self, false] false if no pairs of parts/paragraphs are merged, else self.
299
+ def merge_para_if()
300
+ arind2del = [] # Indices to delete (both paras and boundaries)
301
+ each_index do |ei|
302
+ break if ei >= size - 3 # 2nd last paragraph or later.
303
+ next if !index_para?(ei, skip_check: true)
304
+ ar1st = self.to_a[ei..ei+2]
305
+ ar2nd = ((ei==0) ? nil : self[ei-1])
306
+ do_merge = yield(ar1st, ar2nd, self[ei+3], ei)
307
+ return false if do_merge == :abort
308
+ arind2del.push ei, ei+1, ei+2 if do_merge
309
+ end
310
+
311
+ return false if arind2del.empty?
312
+ arind2del.uniq!
313
+
314
+ (arind2ranges arind2del).reverse.each do |er|
315
+ merge_para!(er)
316
+ end
317
+ return self
318
+ end
319
+
320
+ # merge multiple paragraphs
321
+ #
322
+ # The boundaries between them are simply joined as String as they are.
323
+ #
324
+ # @overload set(index1, index2, *rest)
325
+ # With a list of indices. Unless use_para_index is true, this means the main Array index. Namely, if Part is [P0, B0, P1, B1, P2, B2, B3] and if you want to merge P1 and P2, you specify as (2,3,4) or (2,4). If use_para_index is true, specify as (1,2).
326
+ # @param index1 [Integer] the first index to merge
327
+ # @param index2 [Integer] the second index to merge, and so on...
328
+ # @overload set(range)
329
+ # With a range of the indices to merge. Unless use_para_index is true, this means the main Array index. See the first overload set about it.
330
+ # @param range [Range] describe value param
331
+ # @param use_para_index: [Boolean] If false (Default), the indices are for the main indices (alternative between Parts/Paragraphs and Boundaries, starting from Part/Paragraph). If true, the indices are as obtained with {#parts}, namely the array containing only Parts/Paragraphs.
332
+ # @return [self, nil] nil if nothing is merged (because of wrong indices).
333
+ def merge_para!(*rest, use_para_index: false)
334
+ $myd = true
335
+ #warn "DEBUG:m00: #{rest}\n"
336
+ (ranchk = build_index_range_for_merge_para!(*rest, use_para_index: use_para_index)) || (return self) # Do nothing.
337
+ # ranchk is guaranteed to have a size of 2 or greater.
338
+ #warn "DEBUG:m0: #{ranchk}\n"
339
+ self[ranchk] = [self[ranchk].to_a[0..-2].join, self[ranchk.end]] # 2-elements (Para, Boundary)
340
+ self
341
+ end
342
+
343
+ # Building a proper array for the indices to merge
344
+ #
345
+ # Returns always an even number of Range, starting from para,
346
+ # like (2..5), the last of which is a Boundary which is not merged.
347
+ # In this example, Para(i=2)/Boundary(3)/Para(4) is merged,
348
+ # while Boundary(5) stays as it is.
349
+ #
350
+ # @param (see #merge_para!)
351
+ # @param use_para_index: [Boolean] false
352
+ # @return [Range, nil] nil if no range is selected.
353
+ def build_index_range_for_merge_para!(*rest, use_para_index: false)
354
+ #warn "DEBUG:b0: #{rest.inspect} to_a=#{to_a}\n"
355
+ inary = rest.flatten
356
+ return nil if inary.empty?
357
+ # inary = inary[0] if like_range?(inary[0])
358
+ #warn "DEBUG:b1: #{inary.inspect}\n"
359
+ (ary = to_ary_positive_index(inary, to_a)) || return # Guaranteed to be an array of positive indices (sorted and uniq-ed).
360
+ #warn "DEBUG:b3: #{ary}\n"
361
+ return nil if ary.empty?
362
+
363
+ # Normalize so the array contains both Paragraph and Boundaries in between.
364
+ # After this, ary must be [Para1-index, Boundary1-index, P2, B2, ..., Pn-index, Bn-index]
365
+ # Note: In the input, the index is probably for Paragraph. But,
366
+ # for the sake of later processing, make the array contain an even number
367
+ # of elements, ending with Boundary.
368
+ if use_para_index
369
+ ary = ary.map{|i| [i*2, i*2+1]}.flatten
370
+ elsif index_para?(ary[-1], skip_check: true)
371
+ # The last index in the given Array or Range was for Paragraph (Likely case).
372
+ ary.push(ary[-1]+1)
373
+ end
374
+
375
+ # Exception if they are not consecutive.
376
+ ary.inject{|memo, val| (val == memo+1) ? val : raise(ArgumentError, "Given (Paragraph) indices are not consecutive.")}
377
+
378
+ $myd = false
379
+ # Exception if the first index is for Boundary and no Paragraph.
380
+ raise ArgumentError, "The first index (#{ary[0]}) is not for Paragraph." if !index_para?(ary[0], skip_check: true)
381
+
382
+ i_end = [ary[-1], size-1].min
383
+ return if i_end - ary[0] < 3 # No more than 1 para selected.
384
+
385
+ (ary[0]..ary[-1])
386
+ end
387
+ private :build_index_range_for_merge_para!
388
+
286
389
  # Normalize the content, making sure it has an even number of elements
287
390
  #
288
391
  # The even and odd number elements are, if bare Strings or Array, converted into
@@ -46,6 +46,42 @@ module PlainText
46
46
  adjust_last_element(arret) # => Array
47
47
  end
48
48
 
49
+ # The class-method version of the instance method of the same name.
50
+ #
51
+ # One more parameter (input String) is required to specify.
52
+ #
53
+ # @param instr [String] String that is examined.
54
+ # @param re_in [Regexp, String] If String, it is interpreted literally as in String#split.
55
+ # @param like_linenum: [Boolean] if true (Def: false), it counts like the line number.
56
+ # @param with_if_end: [Boolean] a special case (see the description).
57
+ # @return [Integer] always positive
58
+ # @see PlainText::Split#count_regexp
59
+ def self.count_regexp(instr, re_in, like_linenum: false, with_if_end: false)
60
+ like_linenum = true if with_if_end
61
+ return (with_if_end ? [0, true] : 0) if instr.empty?
62
+ allsize = split_with_delimiter(instr, re_in).size
63
+
64
+ n_normal = allsize.div(2)
65
+ return n_normal if !like_linenum
66
+ n_lines = (allsize.even? ? allsize : allsize+1).div 2
67
+ with_if_end ? [n_normal, (n_normal == n_lines)] : n_lines
68
+ end
69
+
70
+ # The class-method version of the instance method of the same name.
71
+ #
72
+ # One more parameter (input String) is required to specify.
73
+ #
74
+ # @param instr [String] String that is examined.
75
+ # @param linebreak: [String] +\n+ etc (Default: $/).
76
+ # @return [Integer] always positive
77
+ # @see #count_lines
78
+ def self.count_lines(instr, linebreak: $/)
79
+ return 0 if instr.empty?
80
+ ar = instr.split(linebreak, -1) # -1 is specified to preserve the last linebreak(s).
81
+ ar.pop if "" == ar[-1]
82
+ ar.size
83
+ end
84
+
49
85
  ####################################################
50
86
  # Class methods (Private)
51
87
  ####################################################
@@ -93,6 +129,44 @@ module PlainText
93
129
  def split_with_delimiter(*rest)
94
130
  PlainText::Split.public_send(__method__, self, *rest)
95
131
  end
132
+
133
+ # Count the number of matches to self that satisfy the given Regexp
134
+ #
135
+ # If like_linenum option is specified, it is counted like the number of
136
+ # lines, namely the returned value is incremented from the number of
137
+ # matches by 1 unless the very last characters of the String is
138
+ # the last match.
139
+ # For example, if no matches are found, this still returns one.
140
+ #
141
+ # Note if the String (self) is empty, this always returns 0.
142
+ #
143
+ # The special option is +with_if_end+. If given true,
144
+ # this returns Array<Integer, Boolean> instead of a simple Integer,
145
+ # with the first parameter being the Integer of the count as with
146
+ # the default like_linenum=false, and the second parameter gives
147
+ # true if the number is the same even if it was like_linenum=true,
148
+ # namely if the end of the String coincides with the last match,
149
+ # else false.
150
+ # (This parameter is introduced just to reduce the overhead of
151
+ # potentially calling this routine twice or user's making their own check.)
152
+ #
153
+ # @param re_in [Regexp, String] If String, it is interpreted literally as in String#split.
154
+ # @param like_linenum: [Boolean] if true (Def: false), it counts like the line number.
155
+ # @param with_if_end: [Boolean] a special case (see the description).
156
+ # @return [Integer, Array<Integer, Boolean>] always positive
157
+ # @see PlainText::Split#count_regexp
158
+ def count_regexp(*rest, **kwd)
159
+ PlainText::Split.public_send(__method__, self, *rest, **kwd)
160
+ end
161
+
162
+ # Returns the number of lines.
163
+ #
164
+ # @param linebreak: [String] +\n+ etc (Default: $/).
165
+ # @return [Integer] always positive
166
+ # @see PlainText::Split#count_regexp
167
+ def count_lines(**kwd)
168
+ PlainText::Split.public_send(__method__, self, **kwd)
169
+ end
96
170
  end # module Split
97
171
  end # module PlainText
98
172
 
@@ -11,6 +11,32 @@ module PlainText
11
11
  # All methods in this Module are module functions.
12
12
  module_function
13
13
 
14
+ private
15
+
16
+ # Make the Array of Ranges from an Array of positive Integers
17
+ #
18
+ # @example
19
+ # arind2ranges [1,2,3,6,7,9]
20
+ # # => [(1..3), (6..7), (9..9)]
21
+ #
22
+ # @param arin [Array<Integer>]
23
+ # @return [Array<Range>]
24
+ def arind2ranges(arin)
25
+ arout = []
26
+ (curi = curf = arin[0]) || raise("arin should not be empty.")
27
+ arin.each do |i|
28
+ if i > curf + 1
29
+ arout.push(curi..curf)
30
+ curi = curf = i
31
+ else
32
+ curf = i
33
+ end
34
+ end
35
+ arout.push(curi..curf)
36
+ arout
37
+ end
38
+ private :arind2ranges
39
+
14
40
  # Returns a pair of Arrays of even and odd number-indices of the original Array
15
41
  #
16
42
  # @example
@@ -36,14 +62,15 @@ module PlainText
36
62
  # If the negative index is out of range, it returns nil.
37
63
  #
38
64
  # @param i [Integer]
39
- # @param ary [Array] Reference Array.
65
+ # @param ary [Array, Integer, nil] Reference Array or its size (Array#size) or nil (interpreted as self#size (untested)).
40
66
  # @return [Integer, NilClass] nil if out of range to the negative. Note in most cases in Ruby default, it raises IndexError. See the code of {#positive_array_index_checked}
41
67
  # @raise [TypeError] if non-integer is specified.
42
- # @raise [ArgumentError] if ary is not an Array, or more specifically, it does not have size method or ary.size does not return Integer or similar.
43
- def positive_array_index(i, ary)
44
- i2 = i.to_int rescue (raise TypeError, sprintf("no implicit conversion of #{i.class} into Integer"))
68
+ # @raise [ArgumentError] if ary is neither an Array nor Integer, or more specifically, it does not have size method or ary.size does not return Integer or similar.
69
+ def positive_array_index(i, ary=nil)
70
+ arysize = (ary ? (ary.respond_to?(:to_int) ? ary.to_int : ary.size) : size)
71
+ i2 = i.to_int rescue (raise TypeError, sprintf("no implicit conversion of #{i.class} into Integer (i=#{i.inspect},ary=#{ary.inspect})"))
45
72
  return i2 if i2 >= 0
46
- ret = ary.size + i2 rescue (raise ArgumentError, "argument is not an array.")
73
+ ret = arysize + i2 rescue (raise ArgumentError, "Reference is neither an Array nor Integer.")
47
74
  (ret < 0) ? nil : ret
48
75
  end
49
76
 
@@ -55,22 +82,24 @@ module PlainText
55
82
  # Wrapper for {#positive_array_index}
56
83
  #
57
84
  # @param index_in [Integer] Index to check and convert from. Potentially negative integer.
58
- # @param ary [Array] Reference Array.
85
+ # @param ary [Array, Integer, nil] Reference Array or its size (Array#size) or nil (interpreted as self#size (untested)).
59
86
  # @param accept_too_big: [Boolean, NilClass] if true (Default), a positive index larger than the last array index is returned as it is. If nil, the last index + 1 is accepted but raises an Exception for anything larger. If false, any index larger than the last index raises an Exception.
60
87
  # @param varname: [NilClass, String] Name of the variable (or nil) to be used for error messages.
61
88
  # @return [Integer] Non-negative index; i.e., if index=-1 is specified for an Array with a size of 3, the returned value is 2 (the last index of it).
62
89
  # @raise [IndexError] if the index is out of the range to negative.
63
- def positive_array_index_checked(index_in, ary, accept_too_big: true, varname: nil)
90
+ # @raise [ArgumentError] if ary is neither an Array nor Integer, or more specifically, it does not have size method or ary.size does not return Integer or similar.
91
+ def positive_array_index_checked(index_in, ary=nil, accept_too_big: true, varname: nil)
64
92
  # def self.positive_valid_index_for_array(index_in, ary, varname: nil)
65
93
  errmsgs = {}
66
94
  %w(of for).each do |i|
67
95
  errmsgs[i] = (varname ? "." : sprintf(" %s %s.", i, varname))
68
96
  end
69
97
 
70
- index = positive_array_index(index_in, ary) # guaranteed to be Integer or nil
71
- raise IndexError, sprintf("index (%s) too small for array; minimum: -%d", index_in, ary.size) if !index # Ruby default Error message (except the variable "index" as opposed to "index_in is used in the true Ruby default).
98
+ arysize = (ary ? (ary.respond_to?(:to_int) ? ary.to_int : ary.size) : size)
99
+ index = positive_array_index(index_in, arysize) # guaranteed to be Integer or nil (or ArgumentError)
100
+ raise IndexError, sprintf("index (%s) too small for array; minimum: -%d", index_in, arysize) if !index # Ruby default Error message (except the variable "index" as opposed to "index_in is used in the true Ruby default).
72
101
  if index_in >= 0
73
- last_index = ary.size - 1
102
+ last_index = arysize - 1
74
103
  errnote1 = nil
75
104
  if (index > last_index + 1) && !accept_too_big
76
105
  errnote1 = ' (or +1)'
@@ -82,6 +111,38 @@ module PlainText
82
111
  index
83
112
  end
84
113
 
114
+ # Converts Array or Range to an Array with positive indices.
115
+ #
116
+ # @param from [Array, Range]
117
+ # @param arref [Array, Integer] Reference Array or its size (Array#size) or nil (interpreted as self#size).
118
+ # @param flatten: [Boolean] If true (Default), if elements are Range, they are unfolded. If false and if an Array containing a Range, Exception is raised.
119
+ # @param sortuniq: [Boolean] If true (Default), the return is sorted and uniq-ed.
120
+ # @return [Array, nil] nil if arref is empty or if out of range to the negative. Note in most cases in Ruby default, it raises IndexError. See the code of {#positive_array_index_checked}
121
+ # @raise [TypeError] if non-integer is specified.
122
+ # @raise [ArgumentError] if arref is neither an Array nor Integer, or more specifically, it does not have size method or arref.size does not return Integer or similar.
123
+ def to_ary_positive_index(from, arref, flatten: true, sortuniq: true, **kwd)
124
+ arrefsize = (arref ? (arref.respond_to?(:to_int) ? arref.to_int : arref.size) : size)
125
+ return nil if arrefsize < 1
126
+ if flatten
127
+ from = [from].flatten.map{|ec| like_range?(ec) ? send(__method__, ec, arrefsize, flatten: false, sortuniq: sortuniq, **kwd) : ec }.flatten
128
+ end
129
+ if like_range?(from)
130
+ i_beg = positive_array_index_checked(from.begin, arrefsize, **kwd)
131
+ n = from.end
132
+ i_end = ((n.nil? || n == Float::INFINITY) ? arrefsize-1 : positive_array_index_checked(n, arrefsize, **kwd))
133
+ return (from.exclude_end? ? (i_beg...i_end) : (i_beg..i_end)).to_a
134
+ end
135
+ ret = from.map{|i| positive_array_index_checked(i, arrefsize, **kwd)}
136
+ (sortuniq ? ret.sort.uniq : ret)
137
+ end
138
+
139
+ # Returns true if obj is like Range (duck-typing).
140
+ #
141
+ # @param obj [Object]
142
+ def like_range?(obj)
143
+ obj.respond_to? :exclude_end?
144
+ end
145
+
85
146
  # Raise TypeError
86
147
  #
87
148
  # Call as +raise_typeerror(var_name)+ from instance methods,
data/lib/plain_text.rb CHANGED
@@ -591,7 +591,7 @@ module PlainText
591
591
  # @param inclusive: [Boolean] read only when unit is :line. If inclusive (Default), the (entire) line that matches is included in the result.
592
592
  # @param linebreak: [String] +\n+ etc (Default: +$/+), used when +unit==:line+ (Default)
593
593
  # @return [String] as self
594
- def head(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, linebreak: $/)
594
+ def head(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, padding: 0, linebreak: $/)
595
595
  if num_in.class.method_defined? :to_int
596
596
  num = num_in.to_int
597
597
  raise ArgumentError, "Non-positive num (#{num_in}) is given in #{__method__}" if num.to_int < 1
@@ -604,10 +604,10 @@ module PlainText
604
604
  case unit
605
605
  when :line, "-n"
606
606
  # Regexp (for boundary)
607
- return head_regexp(re_in, inclusive: inclusive, linebreak: linebreak) if re_in
607
+ return head_regexp(re_in, inclusive: inclusive, padding: padding, linebreak: linebreak) if re_in
608
608
 
609
609
  # Integer (a number of lines)
610
- ret = split(linebreak)[0..(num-1)].join(linebreak)
610
+ ret = split(linebreak, -1)[0..(num-1)].join(linebreak) # -1 is specified to preserve the last linebreak(s).
611
611
  return ret if size <= ret.size # Specified line is larger than the original or the last NL is missing.
612
612
  return(ret << linebreak) # NL is added to the tail as in the original.
613
613
  when :char
@@ -634,7 +634,7 @@ module PlainText
634
634
  # @return same as self
635
635
  def head_inverse(*rest, **key)
636
636
  s2 = head(*rest, **key)
637
- (s2.size >= size) ? '' : self[s2.size..-1]
637
+ (s2.size >= size) ? self[0,0] : self[s2.size..-1]
638
638
  end
639
639
 
640
640
  # Normalizes line-breaks
@@ -779,7 +779,8 @@ module PlainText
779
779
  # @param inclusive: [Boolean] read only when unit is :line. If inclusive (Default), the (entire) line that matches is included in the result.
780
780
  # @param linebreak: [String] +\n+ etc (Default: +$/+), used when unit==:line (Default)
781
781
  # @return [String] as self
782
- def tail(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, linebreak: $/)
782
+ def tail(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, padding: 0, linebreak: $/)
783
+
783
784
  if num_in.class.method_defined? :to_int
784
785
  num = num_in.to_int
785
786
  raise ArgumentError, "num of zero is given in #{__method__}" if num == 0
@@ -793,7 +794,7 @@ module PlainText
793
794
  case unit
794
795
  when :line, '-n'
795
796
  # Regexp (for boundary)
796
- return tail_regexp(re_in, inclusive: inclusive, linebreak: linebreak) if re_in
797
+ return tail_regexp(re_in, inclusive: inclusive, padding: padding, linebreak: linebreak) if re_in
797
798
 
798
799
  # Integer (a number of lines)
799
800
  return tail_linenum(num_in, num, linebreak: linebreak)
@@ -822,7 +823,7 @@ module PlainText
822
823
  # @return same as self
823
824
  def tail_inverse(*rest, **key)
824
825
  s2 = tail(*rest, **key)
825
- (s2.size >= size) ? '' : self[0..(size-s2.size-1)]
826
+ (s2.size >= size) ? self[0,0] : self[0..(size-s2.size-1)]
826
827
  end
827
828
 
828
829
 
@@ -832,25 +833,71 @@ module PlainText
832
833
 
833
834
  # head command with Regexp
834
835
  #
836
+ # @todo Improve the algorithm like {#tail_regexp}
837
+ #
835
838
  # @param re_in [Regexp] Regexp to determine the boundary.
836
839
  # @param inclusive: [Boolean] If true (Default), the (entire) line that matches re_in is included in the result. Else the entire line is excluded.
840
+ # @param padding: [Integer] Add (postive/negative) the number of lines returned.
837
841
  # @param linebreak: [String] +\n+ etc (Default: $/).
838
842
  # @return [String] as self
839
843
  # @see #head
840
- def head_regexp(re_in, inclusive: true, linebreak: $/)
844
+ def head_regexp(re_in, inclusive: true, padding: 0, linebreak: $/)
845
+ return self if empty?
841
846
  mat = re_in.match self
842
847
  return self if !mat
843
848
  if inclusive
844
- return mat.pre_match+mat[0]+post_match_in_line(mat, linebreak: linebreak)[0]
849
+ postmat = post_match_in_line(mat, linebreak: linebreak)
850
+ strmain = mat.pre_match+mat[0]+(postmat ? postmat[0] : "")
845
851
  else
846
- return pre_match_in_line(mat.pre_match, linebreak: linebreak).pre_match
852
+ strmain = pre_match_in_line(mat.pre_match, linebreak: linebreak).pre_match
847
853
  end
854
+ return strmain if padding == 0
855
+
856
+ ## Adds paddig
857
+
858
+ lines_main_deli, nlines_main = split_lines_with_nlines(strmain, linebreak: linebreak)
859
+ n_lines2ret = nlines_main + padding
860
+
861
+ return self[0,0] if n_lines2ret <= 0 # padding is too negatively large and hence no lines left.
862
+ # [0,0] instead of "" is used to preserve its class (in case it is a child class of String).
863
+ return lines_main_deli[0..(n_lines2ret*2-1)].join if padding < 0
864
+
865
+ # positive padding
866
+ lines_self_deli, nlines_self = split_lines_with_nlines(linebreak: linebreak)
867
+ return self if n_lines2ret > nlines_self
868
+ return lines_self_deli[0..(n_lines2ret*2-1)].join
848
869
  end
849
870
  private :head_regexp
850
871
 
872
+ # Returns Array of [Line,NL,Line,NL, ...] and number of lines
873
+ #
874
+ # Wrapper of {PlainText::Split::split_with_delimiter}(str, linebreak)
875
+ #
876
+ # See {PlainText::Split::count_lines} (and {PlainText::Split#count_lines})
877
+ #
878
+ # @param str [String]
879
+ # @return [Array<Array, Integer>]
880
+ def split_lines_with_nlines(str=self, linebreak: $/)
881
+ arline = PlainText::Split::split_with_delimiter(str, linebreak)
882
+ nlines = arline.size
883
+ nlines += 1 if nlines.odd? # There is no newline at the tail.
884
+
885
+ # This is the NUMBER of the lines (NOT index)
886
+ nlines = nlines.quo 2 # "/" MUST be fine...
887
+ [arline, nlines]
888
+ end
889
+ private :split_lines_with_nlines
890
+
851
891
 
852
892
  # Returns MatchData of the String at and before the first linebreak before the MatchData (inclusive)
853
893
  #
894
+ # Basically this returns String after the last linebreak of the input
895
+ #
896
+ # @example
897
+ # pre_match_in_line("1\n2\n__abc") # => #<MatchData "__abc"> pre_match=="1\n2\n"
898
+ # pre_match_in_line("1\n2\n") # => #<MatchData "" > pre_match=="1\n2\n"
899
+ # pre_match_in_line( "__abc") # => #<MatchData "__abc"> pre_match==" "
900
+ #
854
901
  # @param strpre [String] String of prematch of the last MatchData
855
902
  # @param linebreak: [String] +\n+ etc (Default: $/)
856
903
  # @return [MatchData] m[0] is the string after the last linebreak before the matched data (exclusive) and m.pre_match is all the lines before that.
@@ -861,46 +908,124 @@ module PlainText
861
908
  end
862
909
  private :pre_match_in_line
863
910
 
911
+ # Get the line index numbers of the first and last lines of the mathced string
912
+ #
913
+ # It is the Ruby index number as used in the each_line method.
914
+ #
915
+ # Matched String can span for multi-lines.
916
+ #
917
+ # Note if matched string is empty, it still is treated as significant.
918
+ #
919
+ # @param mat [MatchData, String] If String, it is User's (last) matched String.
920
+ # @param strpre [String, nil] Pre-match from the beginning of self to the mathced string, if mat is String.
921
+ # @param linebreak: [String] +\n+ etc (Default: $/)
922
+ # @return [Hash<Integer, nil>] 4 keys: :last_prematch, :first_matched, :last_matched, :first_post_match
923
+ def _matched_line_indices(mat, strpre=nil, linebreak: $/)
924
+ if mat.class.method_defined? :post_match
925
+ # mat is MatchData
926
+ strmatched, strpre = mat[0], mat.pre_match
927
+ else
928
+ strmatched = mat.to_str rescue raise_typeerror(mat, 'String')
929
+ end
930
+
931
+ hsret = {
932
+ # last_prematch: nil,
933
+ first_matched: nil,
934
+ last_matched: nil,
935
+ # first_post_match: nil
936
+ }
937
+
938
+ _, hsret[:first_matched] = _ilines_consecutive(strpre, linebreak: linebreak)
939
+ hsret[:last_matched], _ = _ilines_consecutive(strpre+strmatched, linebreak: linebreak)
940
+
941
+ hsret
942
+ end
943
+ private :_matched_line_indices
944
+
945
+ # Returns line number of the three lines.
946
+ #
947
+ # In the following order:
948
+ #
949
+ # 1. Index of the last line of the argument String (number of lines - 1) (or nil if it becomes negative).
950
+ # 2. the 1st one plus 1 IF the last line ends with a linebreak. Or, 0 if the first one is empty. Otherwise the same as the 1st.
951
+ #
952
+ # @return [Array<Integer, nil>] nil if the value is invalid.
953
+ def _ilines_consecutive(str, linebreak: $/)
954
+ return [nil, 0] if str.empty?
955
+
956
+ # Only if first ends with a linebreak, increment by 1 for the second.
957
+ nmatched, with_linened = PlainText::Split.count_regexp(str, linebreak, with_if_end: true)
958
+ first = nmatched + (with_linened ? 0 : 1) - 1
959
+ second = first + (with_linened ? 1 : 0)
960
+ first = nil if first < 0
961
+ [first, second]
962
+ end
963
+ private :_ilines_consecutive
964
+
864
965
  # Returns MatchData of the String after the MatchData to the linebreak (inclusive)
865
966
  #
866
- # @param mat [MatchData, String]
867
- # @param strpost [String, nil] Post-match, if mat is String.
967
+ # @param mat [MatchData, String] If String, it is User's (last) matched String.
968
+ # @param strpost [String, nil] Post-match, if mat is String. After User's last match.
868
969
  # @param linebreak: [String] +\n+ etc (Default: $/)
869
- # @return [MatchData] m[0] is the string after matched data and up to the next first linebreak (inclusive) (or empty string if the last character(s) of matched data is the linebreak) and m.post_match is all the lines after that.
970
+ # @return [MatchData] m[0] is the string after matched data and up to the next first linebreak (inclusive) (or empty string if the last character(s) of matched data is the linebreak) and m.post_match is all the lines after that. (maybe nil?? not sure...)
870
971
  def post_match_in_line(mat, strpost=nil, linebreak: $/)
972
+ lb_quo = Regexp.quote linebreak
871
973
  if mat.class.method_defined? :post_match
872
974
  # mat is MatchData
873
975
  strmatched, strpost = mat[0], mat.post_match
874
976
  else
875
977
  strmatched = mat.to_str rescue raise_typeerror(mat, 'String')
876
978
  end
877
- lb_quo = Regexp.quote linebreak
878
- return /\A/.match if /#{lb_quo}\z/ =~ strmatched
979
+ return(/\A/.match strpost) if /#{lb_quo}\z/ =~ strmatched
980
+ return(/\A/.match strpost) if strpost.empty?
879
981
  /.*?#{lb_quo}/m.match strpost # non-greedy match and m option are required, as linebreak can be any characters.
880
982
  end
881
983
  private :post_match_in_line
882
984
 
883
985
  # tail command with Regexp
884
986
  #
987
+ # == Algorithm
988
+ #
989
+ # 1. Split the String with Regexp with {PlainText::Split#split_with_delimiter}
990
+ # 2. Find the last matched String.
991
+ # 3. Find the "line"-index-number of the matched String.
992
+ # 4. Adjust the line index number depending inclusive/exclusive
993
+ # 5. Add positive/negative padding number
994
+ # 6. pass it to {#head_inverse} (after Line-1).
995
+ #
885
996
  # @param re_in [Regexp] Regexp to determine the boundary.
886
997
  # @param inclusive: [Boolean] If true (Default), the (entire) line that matches re_in is included in the result. Else the entire line is excluded.
887
998
  # @param linebreak: [String] +\n+ etc (Default: $/).
888
999
  # @return [String] as self
889
1000
  # @see #tail
890
- def tail_regexp(re_in, inclusive: true, linebreak: $/)
891
- arst = split_with_delimiter re_in # PlainText::Split#split_with_delimiter (included in String)
892
- return self.class.new("") if 0 == arst.size # Maybe self is a sub-class of String.
893
-
894
- if inclusive
895
- return pre_match_in_line( arst[0..-3].join, linebreak: linebreak)[0] + arst[-2] + arst[-1]
896
- # Note: Even if (arst.size < 3), arst[0..-3] returns [].
897
- else
898
- return post_match_in_line(arst[-2], arst[-1], linebreak: linebreak).post_match
899
- end
1001
+ def tail_regexp(re_in, inclusive: true, padding: 0, linebreak: $/)
1002
+ return self if empty?
1003
+
1004
+ # "split" with the given Regexp pattern (NOT with linebreak!)
1005
+ # arst == (Array<String>) [PreMatch, UsersMatch1, Str, UsersMatch2,..., UsersMatchN [, PostMatch]]
1006
+ # If the user's match comes at the very end, the last element does not exist.
1007
+ arst = split_with_delimiter re_in # PlainText::Split#split_with_delimiter (included in this module (hence maybe String))
1008
+
1009
+ # UsersMatch basically failed - no match.
1010
+ return self[0,0] if arst.size <= 1
1011
+
1012
+ arst.push "" if arst.size.even?
1013
+ # Now the last component is guarantee to be not the delimiter (= String of User's match)
1014
+ # arst == [PreMatch, UsersMatch1, ..., UsersMatchN, PostMatch(maybe-empty)]
1015
+ # Minimum:
1016
+ # arst == [PreMatch, UsersMatch1, PostMatch] (<= maybe much more PreMatch-es)
1017
+ # (Either/both PreMatch and PostMatch can be empty).
1018
+
1019
+ hslinenum = _matched_line_indices(arst[-2], arst[0..-3].join, linebreak: $/)
1020
+
1021
+ # Note: hslinenum[] is for indices, whereas the number of lines is
1022
+ # required here to pass to head_inverse().
1023
+ nlines_remove_to = (inclusive ? hslinenum[:first_matched] : hslinenum[:last_matched]+1) - padding
1024
+ return self if nlines_remove_to <= 0 # everything
1025
+ return head_inverse(nlines_remove_to)
900
1026
  end
901
1027
  private :tail_regexp
902
1028
 
903
-
904
1029
  # tail command based on the number of lines
905
1030
  #
906
1031
  # @param num_in [Integer] Original argument of the specified number of lines
@@ -910,7 +1035,7 @@ module PlainText
910
1035
  # @see #tail
911
1036
  def tail_linenum(num_in, num, linebreak: $/)
912
1037
  arret = split(linebreak, -1) # -1 is specified to preserve the last linebreak(s).
913
- return self.class.new("") if arret.empty?
1038
+ return self[0,0] if arret.empty?
914
1039
 
915
1040
  lb_quo = Regexp.quote linebreak
916
1041
  if num_in > 0
@@ -918,7 +1043,7 @@ module PlainText
918
1043
  num = 0 if num >= arret.size
919
1044
  end
920
1045
  ar = arret[(-num)..-1]
921
- (ar.nil? || ar.empty?) ? self.class.new("") : ar.join(linebreak)
1046
+ (ar.nil? || ar.empty?) ? self[0,0] : ar.join(linebreak)
922
1047
  end
923
1048
  private :tail_linenum
924
1049
 
data/plain_text.gemspec CHANGED
@@ -5,17 +5,17 @@ require 'date'
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{plain_text}.sub(/.*/){|c| (c == File.basename(Dir.pwd)) ? c : raise("ERROR: s.name=(#{c}) in gemspec seems wrong!")}
8
- s.version = "0.4"
8
+ s.version = "0.5".sub(/.*/){|c| fs = Dir.glob('changelog{,.*}', File::FNM_CASEFOLD); raise('More than one ChangeLog exist!') if fs.size > 1; warn("WARNING: Version(s.version=#{c}) already exists in #{fs[0]} - ok?") if fs.size == 1 && !IO.readlines(fs[0]).grep(/^\(Version: #{Regexp.quote c}\)$/).empty? ; c } # n.b., In macOS, changelog and ChangeLog are identical in default.
9
9
  # s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
10
- %w(countchar textclean head.rb tail.rb).each do |f|
10
+ s.bindir = 'bin'
11
+ %w(countchar textclean head.rb tail.rb yard2md_afterclean).each do |f|
11
12
  path = s.bindir+'/'+f
12
13
  File.executable?(path) ? s.executables << f : raise("ERROR: Executable (#{path}) is not executable!")
13
14
  end
14
- s.bindir = 'bin'
15
15
  s.authors = ["Masa Sakano"]
16
- s.date = %q{2019-10-29}.sub(/.*/){|c| (Date.parse(c) == Date.today) ? c : raise("ERROR: s.date=(#{c}) is not today!")}
16
+ s.date = %q{2019-11-07}.sub(/.*/){|c| (Date.parse(c) == Date.today) ? c : raise("ERROR: s.date=(#{c}) is not today!")}
17
17
  s.summary = %q{Module to handle Plain-Text}
18
- s.description = %q{This module provides utility functions and methods to handle plain text, classes Part/Paragraph/Boundary to represent the logical structure of a document and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby instance.}
18
+ s.description = %q{This module provides utility functions and methods to handle plain text, classes Part/Paragraph/Boundary to represent the logical structure of a document and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby instance. A few handy Ruby executable scripts to make use of them are included.}
19
19
  # s.email = %q{abc@example.com}
20
20
  s.extra_rdoc_files = [
21
21
  # "LICENSE",
@@ -34,6 +34,7 @@ Gem::Specification.new do |s|
34
34
  ret
35
35
  }
36
36
  s.files.reject! { |fn| File.symlink? fn }
37
+
37
38
  # s.add_runtime_dependency 'rails'
38
39
  # s.add_development_dependency "bourne", [">= 0"]
39
40
  s.homepage = %q{https://www.wisebabel.com}
@@ -47,5 +48,8 @@ Gem::Specification.new do |s|
47
48
  # s.rubygems_version = %q{1.3.5} # This is always set automatically!!
48
49
 
49
50
  s.metadata["yard.run"] = "yri" # use "yard" to build full HTML docs.
51
+ s.metadata["changelog_uri"] = "https://github.com/masasakano/#{s.name}/blob/master/ChangeLog"
52
+ s.metadata["source_code_uri"] = "https://github.com/masasakano/#{s.name}"
53
+ # s.metadata["documentation_uri"] = "https://www.example.info/gems/bestgemever/0.0.1"
50
54
  end
51
55