RubyGems - plain_text - Versions diffs - 0.4 → 0.5 - Mend

plain_text 0.4 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/ChangeLog +28 -0
data/README.en.rdoc +52 -8
data/bin/head.rb +27 -13
data/bin/tail.rb +28 -12
data/bin/yard2md_afterclean +213 -0
data/lib/plain_text/parse_rule.rb +4 -1
data/lib/plain_text/part.rb +103 -0
data/lib/plain_text/split.rb +74 -0
data/lib/plain_text/util.rb +71 -10
data/lib/plain_text.rb +153 -28
data/plain_text.gemspec +9 -5
data/test/test_plain_text.rb +110 -1
data/test/test_plain_text_part.rb +80 -0
data/test/test_plain_text_split.rb +29 -0
data/test/test_plain_text_util.rb +36 -0
data/test/testhead_rb.rb +59 -4
data/test/testtail_rb.rb +58 -8
data/test/testyard2md_afterclean.rb +71 -0
metadata +11 -3

data/lib/plain_text/part.rb CHANGED Viewed

@@ -81,6 +81,7 @@ module PlainText
       even_num: 'even number of elements must be specified.',
       use_to_a: 'To handle it as an Array, use to_a first.',
     }
+    private_constant :ERR_MSGS
     # @param arin [Array] of [Paragraph1, Boundary1, Para2, Bd2, ...] or Part/Paragraph if boundaries is given
     # @param boundaries [Array] of Boundary
@@ -283,6 +284,108 @@ module PlainText
       map_part_core(with_index: false, **kwd, &bl)
     end
+    # merge parts/paragraphs if they satisfy the conditions.
+    #
+    # A group of two Parts/Paragraphs and the Boundaries in between and before and after
+    # is passed to the block consecutively.
+    #
+    # @yield [ary, b1, b2, i] Returns true if the two paragraphs should be merged.
+    # @yieldparam [Array] ary of [Para1st, BoundaryBetween, Para2nd]
+    # @yieldparam [Boundary] b1 Boundary-String before the first part/paragraph (nil for the first one)
+    # @yieldparam [Boundary] b2 Boundary-String after the second part/paragraph
+    # @yieldparam [Integer] i Index of the first Para
+    # @yieldreturn [Boolean, Symbol] True if they should be merged.  :abort if cancel it.
+    # @return [self, false] false if no pairs of parts/paragraphs are merged, else self.
+    def merge_para_if()
+      arind2del = []  # Indices to delete (both paras and boundaries)
+      each_index do |ei|
+        break if ei >= size - 3  # 2nd last paragraph or later.
+        next if !index_para?(ei, skip_check: true)
+        ar1st = self.to_a[ei..ei+2]
+        ar2nd = ((ei==0) ? nil : self[ei-1])
+        do_merge = yield(ar1st, ar2nd, self[ei+3], ei)
+        return false                 if do_merge == :abort
+        arind2del.push ei, ei+1, ei+2 if do_merge
+      end
+      return false if arind2del.empty?
+      arind2del.uniq!
+      (arind2ranges arind2del).reverse.each do |er|
+        merge_para!(er)
+      end
+      return self
+    end
+    # merge multiple paragraphs
+    #
+    # The boundaries between them are simply joined as String as they are.
+    #
+    # @overload set(index1, index2, *rest)
+    #   With a list of indices.  Unless use_para_index is true, this means the main Array index. Namely, if Part is [P0, B0, P1, B1, P2, B2, B3] and if you want to merge P1 and P2, you specify as (2,3,4) or (2,4).  If use_para_index is true, specify as (1,2).
+    #   @param index1 [Integer] the first index to merge
+    #   @param index2 [Integer] the second index to merge, and so on...
+    # @overload set(range)
+    #   With a range of the indices to merge. Unless use_para_index is true, this means the main Array index. See the first overload set about it.
+    #   @param range [Range] describe value param
+    # @param use_para_index: [Boolean] If false (Default), the indices are for the main indices (alternative between Parts/Paragraphs and Boundaries, starting from Part/Paragraph). If true, the indices are as obtained with {#parts}, namely the array containing only Parts/Paragraphs.
+    # @return [self, nil] nil if nothing is merged (because of wrong indices).
+    def merge_para!(*rest, use_para_index: false)
+$myd = true
+#warn "DEBUG:m00: #{rest}\n"
+      (ranchk = build_index_range_for_merge_para!(*rest, use_para_index: use_para_index)) || (return self)  # Do nothing.
+      # ranchk is guaranteed to have a size of 2 or greater.
+#warn "DEBUG:m0: #{ranchk}\n"
+      self[ranchk] = [self[ranchk].to_a[0..-2].join, self[ranchk.end]]  # 2-elements (Para, Boundary)
+      self
+    end
+    # Building a proper array for the indices to merge
+    #
+    # Returns always an even number of Range, starting from para,
+    # like (2..5), the last of which is a Boundary which is not merged.
+    # In this example, Para(i=2)/Boundary(3)/Para(4) is merged,
+    # while Boundary(5) stays as it is.
+    #
+    # @param (see #merge_para!)
+    # @param use_para_index: [Boolean] false
+    # @return [Range, nil] nil if no range is selected.
+    def build_index_range_for_merge_para!(*rest, use_para_index: false)
+#warn "DEBUG:b0: #{rest.inspect} to_a=#{to_a}\n"
+      inary = rest.flatten
+      return nil if inary.empty?
+      # inary = inary[0] if like_range?(inary[0])
+#warn "DEBUG:b1: #{inary.inspect}\n"
+      (ary = to_ary_positive_index(inary, to_a)) || return  # Guaranteed to be an array of positive indices (sorted and uniq-ed).
+#warn "DEBUG:b3: #{ary}\n"
+      return nil if ary.empty?
+      # Normalize so the array contains both Paragraph and Boundaries in between.
+      # After this, ary must be [Para1-index, Boundary1-index, P2, B2, ..., Pn-index, Bn-index]
+      # Note: In the input, the index is probably for Paragraph.  But,
+      #   for the sake of later processing, make the array contain an even number
+      #   of elements, ending with Boundary.
+      if use_para_index
+        ary = ary.map{|i| [i*2, i*2+1]}.flatten
+      elsif index_para?(ary[-1], skip_check: true)
+        # The last index in the given Array or Range was for Paragraph (Likely case).
+        ary.push(ary[-1]+1)
+      end
+      # Exception if they are not consecutive.
+      ary.inject{|memo, val| (val == memo+1) ? val : raise(ArgumentError, "Given (Paragraph) indices are not consecutive.")}
+$myd = false
+      # Exception if the first index is for Boundary and no Paragraph.
+      raise ArgumentError, "The first index (#{ary[0]}) is not for Paragraph." if !index_para?(ary[0], skip_check: true)
+      i_end = [ary[-1], size-1].min
+      return if i_end - ary[0] < 3  # No more than 1 para selected.
+      (ary[0]..ary[-1])
+    end
+    private :build_index_range_for_merge_para!
     # Normalize the content, making sure it has an even number of elements
     #
     # The even and odd number elements are, if bare Strings or Array, converted into

data/lib/plain_text/split.rb CHANGED Viewed

@@ -46,6 +46,42 @@ module PlainText
       adjust_last_element(arret) # => Array
     end
+    # The class-method version of the instance method of the same name.
+    #
+    # One more parameter (input String) is required to specify.
+    #
+    # @param instr [String] String that is examined.
+    # @param re_in [Regexp, String] If String, it is interpreted literally as in String#split.
+    # @param like_linenum: [Boolean] if true (Def: false), it counts like the line number.
+    # @param with_if_end: [Boolean] a special case (see the description).
+    # @return [Integer] always positive
+    # @see PlainText::Split#count_regexp
+    def self.count_regexp(instr, re_in, like_linenum: false, with_if_end: false)
+      like_linenum = true if with_if_end
+      return (with_if_end ? [0, true] : 0) if instr.empty?
+      allsize = split_with_delimiter(instr, re_in).size
+      n_normal = allsize.div(2)
+      return n_normal if !like_linenum
+      n_lines = (allsize.even? ? allsize : allsize+1).div 2
+      with_if_end ? [n_normal, (n_normal ==  n_lines)] : n_lines
+    end
+    # The class-method version of the instance method of the same name.
+    #
+    # One more parameter (input String) is required to specify.
+    #
+    # @param instr [String] String that is examined.
+    # @param linebreak: [String] +\n+ etc (Default: $/).
+    # @return [Integer] always positive
+    # @see #count_lines
+    def self.count_lines(instr, linebreak: $/)
+      return 0 if instr.empty?
+      ar = instr.split(linebreak, -1)  # -1 is specified to preserve the last linebreak(s).
+      ar.pop if "" == ar[-1]
+      ar.size
+    end
     ####################################################
     # Class methods (Private)
     ####################################################
@@ -93,6 +129,44 @@ module PlainText
     def split_with_delimiter(*rest)
       PlainText::Split.public_send(__method__, self, *rest)
     end
+    # Count the number of matches to self that satisfy the given Regexp
+    #
+    # If like_linenum option is specified, it is counted like the number of
+    # lines, namely the returned value is incremented from the number of
+    # matches by 1 unless the very last characters of the String is
+    # the last match.
+    # For example, if no matches are found, this still returns one.
+    #
+    # Note if the String (self) is empty, this always returns 0.
+    #
+    # The special option is +with_if_end+.  If given true,
+    # this returns Array<Integer, Boolean> instead of a simple Integer,
+    # with the first parameter being the Integer of the count as with
+    # the default like_linenum=false, and the second parameter gives
+    # true if the number is the same even if it was like_linenum=true,
+    # namely if the end of the String coincides with the last match,
+    # else false.
+    # (This parameter is introduced just to reduce the overhead of
+    # potentially calling this routine twice or user's making their own check.)
+    #
+    # @param re_in [Regexp, String] If String, it is interpreted literally as in String#split.
+    # @param like_linenum: [Boolean] if true (Def: false), it counts like the line number.
+    # @param with_if_end: [Boolean] a special case (see the description).
+    # @return [Integer, Array<Integer, Boolean>] always positive
+    # @see PlainText::Split#count_regexp
+    def count_regexp(*rest, **kwd)
+      PlainText::Split.public_send(__method__, self, *rest, **kwd)
+    end
+    # Returns the number of lines.
+    #
+    # @param linebreak: [String] +\n+ etc (Default: $/).
+    # @return [Integer] always positive
+    # @see PlainText::Split#count_regexp
+    def count_lines(**kwd)
+      PlainText::Split.public_send(__method__, self, **kwd)
+    end
   end # module Split
 end # module PlainText

data/lib/plain_text/util.rb CHANGED Viewed

@@ -11,6 +11,32 @@ module PlainText
     # All methods in this Module are module functions.
     module_function
+    private
+    # Make the Array of Ranges from an Array of positive Integers
+    #
+    # @example
+    #    arind2ranges [1,2,3,6,7,9]
+    #    # => [(1..3), (6..7), (9..9)]
+    #
+    # @param arin [Array<Integer>]
+    # @return [Array<Range>]
+    def arind2ranges(arin)
+      arout = []
+      (curi = curf = arin[0]) || raise("arin should not be empty.")
+      arin.each do |i|
+        if i > curf + 1
+          arout.push(curi..curf)
+          curi = curf = i
+        else
+          curf = i
+        end
+      end
+      arout.push(curi..curf)
+      arout
+    end
+    private :arind2ranges
     # Returns a pair of Arrays of even and odd number-indices of the original Array
     #
     # @example
@@ -36,14 +62,15 @@ module PlainText
     # If the negative index is out of range, it returns nil.
     #
     # @param i [Integer]
-    # @param ary [Array] Reference Array.
+    # @param ary [Array, Integer, nil] Reference Array or its size (Array#size) or nil (interpreted as self#size (untested)).
     # @return [Integer, NilClass] nil if out of range to the negative.  Note in most cases in Ruby default, it raises IndexError.  See the code of {#positive_array_index_checked}
     # @raise [TypeError] if non-integer is specified.
-    # @raise [ArgumentError] if ary is not an Array, or more specifically, it does not have size method or ary.size does not return Integer or similar.
-    def positive_array_index(i, ary)
-      i2 = i.to_int rescue (raise TypeError, sprintf("no implicit conversion of #{i.class} into Integer"))
+    # @raise [ArgumentError] if ary is neither an Array nor Integer, or more specifically, it does not have size method or ary.size does not return Integer or similar.
+    def positive_array_index(i, ary=nil)
+      arysize = (ary ? (ary.respond_to?(:to_int) ? ary.to_int : ary.size) : size)
+      i2 = i.to_int rescue (raise TypeError, sprintf("no implicit conversion of #{i.class} into Integer (i=#{i.inspect},ary=#{ary.inspect})"))
       return i2 if i2 >= 0
-      ret = ary.size + i2 rescue (raise ArgumentError, "argument is not an array.")
+      ret = arysize + i2 rescue (raise ArgumentError, "Reference is neither an Array nor Integer.")
       (ret < 0) ? nil : ret
     end
@@ -55,22 +82,24 @@ module PlainText
     # Wrapper for {#positive_array_index}
     #
     # @param index_in [Integer] Index to check and convert from. Potentially negative integer.
-    # @param ary [Array] Reference Array.
+    # @param ary [Array, Integer, nil] Reference Array or its size (Array#size) or nil (interpreted as self#size (untested)).
     # @param accept_too_big: [Boolean, NilClass] if true (Default), a positive index larger than the last array index is returned as it is. If nil, the last index + 1 is accepted but raises an Exception for anything larger.  If false, any index larger than the last index raises an Exception.
     # @param varname: [NilClass, String] Name of the variable (or nil) to be used for error messages.
     # @return [Integer] Non-negative index; i.e., if index=-1 is specified for an Array with a size of 3, the returned value is 2 (the last index of it).
     # @raise [IndexError] if the index is out of the range to negative.
-    def positive_array_index_checked(index_in, ary, accept_too_big: true, varname: nil)
+    # @raise [ArgumentError] if ary is neither an Array nor Integer, or more specifically, it does not have size method or ary.size does not return Integer or similar.
+    def positive_array_index_checked(index_in, ary=nil, accept_too_big: true, varname: nil)
       # def self.positive_valid_index_for_array(index_in, ary, varname: nil)
       errmsgs = {}
       %w(of for).each do |i|
         errmsgs[i] = (varname ? "." : sprintf(" %s %s.", i, varname))
       end
-      index = positive_array_index(index_in, ary)  # guaranteed to be Integer or nil
-      raise IndexError, sprintf("index (%s) too small for array; minimum: -%d", index_in, ary.size) if !index  # Ruby default Error message (except the variable "index" as opposed to "index_in is used in the true Ruby default).
+      arysize = (ary ? (ary.respond_to?(:to_int) ? ary.to_int : ary.size) : size)
+      index = positive_array_index(index_in, arysize)  # guaranteed to be Integer or nil (or ArgumentError)
+      raise IndexError, sprintf("index (%s) too small for array; minimum: -%d", index_in, arysize) if !index  # Ruby default Error message (except the variable "index" as opposed to "index_in is used in the true Ruby default).
       if index_in >= 0
-        last_index = ary.size - 1
+        last_index = arysize - 1
         errnote1 = nil
         if    (index >  last_index + 1) && !accept_too_big
           errnote1 = ' (or +1)'
@@ -82,6 +111,38 @@ module PlainText
       index
     end
+    # Converts Array or Range to an Array with positive indices.
+    #
+    # @param from [Array, Range]
+    # @param arref [Array, Integer] Reference Array or its size (Array#size) or nil (interpreted as self#size).
+    # @param flatten: [Boolean] If true (Default), if elements are Range, they are unfolded.  If false and if an Array containing a Range, Exception is raised.
+    # @param sortuniq: [Boolean] If true (Default), the return is sorted and uniq-ed.
+    # @return [Array, nil] nil if arref is empty or if out of range to the negative.  Note in most cases in Ruby default, it raises IndexError.  See the code of {#positive_array_index_checked}
+    # @raise [TypeError] if non-integer is specified.
+    # @raise [ArgumentError] if arref is neither an Array nor Integer, or more specifically, it does not have size method or arref.size does not return Integer or similar.
+    def to_ary_positive_index(from, arref, flatten: true, sortuniq: true, **kwd)
+      arrefsize = (arref ? (arref.respond_to?(:to_int) ? arref.to_int : arref.size) : size)
+      return nil if arrefsize < 1
+      if flatten
+        from = [from].flatten.map{|ec| like_range?(ec) ? send(__method__, ec, arrefsize, flatten: false, sortuniq: sortuniq, **kwd) : ec }.flatten
+      end
+      if like_range?(from)
+        i_beg = positive_array_index_checked(from.begin, arrefsize, **kwd)
+        n = from.end
+        i_end = ((n.nil? || n == Float::INFINITY) ? arrefsize-1 : positive_array_index_checked(n, arrefsize, **kwd))
+        return (from.exclude_end? ? (i_beg...i_end) : (i_beg..i_end)).to_a
+      end
+      ret = from.map{|i| positive_array_index_checked(i, arrefsize, **kwd)}
+      (sortuniq ? ret.sort.uniq : ret)
+    end
+    # Returns true if obj is like Range (duck-typing).
+    #
+    # @param obj [Object]
+    def like_range?(obj)
+      obj.respond_to? :exclude_end?
+    end
     # Raise TypeError
     #
     # Call as +raise_typeerror(var_name)+ from instance methods,

data/lib/plain_text.rb CHANGED Viewed

@@ -591,7 +591,7 @@ module PlainText
   # @param inclusive: [Boolean] read only when unit is :line. If inclusive (Default), the (entire) line that matches is included in the result.
   # @param linebreak: [String] +\n+ etc (Default: +$/+), used when +unit==:line+ (Default)
   # @return [String] as self
-  def head(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, linebreak: $/)
+  def head(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, padding: 0, linebreak: $/)
     if num_in.class.method_defined? :to_int
       num = num_in.to_int
       raise ArgumentError, "Non-positive num (#{num_in}) is given in #{__method__}" if num.to_int < 1
@@ -604,10 +604,10 @@ module PlainText
     case unit
     when :line, "-n"
       # Regexp (for boundary)
-      return head_regexp(re_in, inclusive: inclusive, linebreak: linebreak) if re_in
+      return head_regexp(re_in, inclusive: inclusive, padding: padding, linebreak: linebreak) if re_in
       # Integer (a number of lines)
-      ret = split(linebreak)[0..(num-1)].join(linebreak)
+      ret = split(linebreak, -1)[0..(num-1)].join(linebreak)  # -1 is specified to preserve the last linebreak(s).
       return ret if size <= ret.size  # Specified line is larger than the original or the last NL is missing.
       return(ret << linebreak)  # NL is added to the tail as in the original.
     when :char
@@ -634,7 +634,7 @@ module PlainText
   # @return same as self
   def head_inverse(*rest, **key)
     s2 = head(*rest, **key)
-    (s2.size >= size) ? '' : self[s2.size..-1]
+    (s2.size >= size) ? self[0,0] : self[s2.size..-1]
   end
   # Normalizes line-breaks
@@ -779,7 +779,8 @@ module PlainText
   # @param inclusive: [Boolean] read only when unit is :line. If inclusive (Default), the (entire) line that matches is included in the result.
   # @param linebreak: [String] +\n+ etc (Default: +$/+), used when unit==:line (Default)
   # @return [String] as self
-  def tail(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, linebreak: $/)
+  def tail(num_in=DEF_HEADTAIL_N_LINES, unit: :line, inclusive: true, padding: 0, linebreak: $/)
     if num_in.class.method_defined? :to_int
       num = num_in.to_int
       raise ArgumentError, "num of zero is given in #{__method__}" if num == 0
@@ -793,7 +794,7 @@ module PlainText
     case unit
     when :line, '-n'
       # Regexp (for boundary)
-      return tail_regexp(re_in, inclusive: inclusive, linebreak: linebreak) if re_in
+      return tail_regexp(re_in, inclusive: inclusive, padding: padding, linebreak: linebreak) if re_in
       # Integer (a number of lines)
       return tail_linenum(num_in, num, linebreak: linebreak)
@@ -822,7 +823,7 @@ module PlainText
   # @return same as self
   def tail_inverse(*rest, **key)
     s2 = tail(*rest, **key)
-    (s2.size >= size) ? '' : self[0..(size-s2.size-1)]
+    (s2.size >= size) ? self[0,0] : self[0..(size-s2.size-1)]
   end
@@ -832,25 +833,71 @@ module PlainText
   # head command with Regexp
   #
+  # @todo Improve the algorithm like {#tail_regexp}
+  #
   # @param re_in [Regexp] Regexp to determine the boundary.
   # @param inclusive: [Boolean] If true (Default), the (entire) line that matches re_in is included in the result. Else the entire line is excluded.
+  # @param padding: [Integer] Add (postive/negative) the number of lines returned.
   # @param linebreak: [String] +\n+ etc (Default: $/).
   # @return [String] as self
   # @see #head
-  def head_regexp(re_in, inclusive: true, linebreak: $/)
+  def head_regexp(re_in, inclusive: true, padding: 0, linebreak: $/)
+    return self if empty?
     mat = re_in.match self
     return self if !mat
     if inclusive
-      return mat.pre_match+mat[0]+post_match_in_line(mat, linebreak: linebreak)[0]
+      postmat = post_match_in_line(mat, linebreak: linebreak)
+      strmain = mat.pre_match+mat[0]+(postmat ? postmat[0] : "")
     else
-      return pre_match_in_line(mat.pre_match, linebreak: linebreak).pre_match
+      strmain = pre_match_in_line(mat.pre_match, linebreak: linebreak).pre_match
     end
+    return strmain if padding == 0
+    ## Adds paddig
+    lines_main_deli, nlines_main = split_lines_with_nlines(strmain, linebreak: linebreak)
+    n_lines2ret = nlines_main + padding
+    return self[0,0] if n_lines2ret <= 0  # padding is too negatively large and hence no lines left.
+      # [0,0] instead of "" is used to preserve its class (in case it is a child class of String).
+    return lines_main_deli[0..(n_lines2ret*2-1)].join if padding < 0
+    # positive padding
+    lines_self_deli, nlines_self = split_lines_with_nlines(linebreak: linebreak)
+    return self if n_lines2ret > nlines_self
+    return lines_self_deli[0..(n_lines2ret*2-1)].join
   end
   private :head_regexp
+  # Returns Array of [Line,NL,Line,NL, ...] and number of lines
+  #
+  # Wrapper of {PlainText::Split::split_with_delimiter}(str, linebreak)
+  #
+  # See {PlainText::Split::count_lines} (and {PlainText::Split#count_lines})
+  #
+  # @param str [String]
+  # @return [Array<Array, Integer>]
+  def split_lines_with_nlines(str=self, linebreak: $/)
+    arline = PlainText::Split::split_with_delimiter(str, linebreak)
+    nlines = arline.size
+    nlines += 1 if nlines.odd?  # There is no newline at the tail.
+    # This is the NUMBER of the lines (NOT index)
+    nlines = nlines.quo 2  # "/" MUST be fine...
+    [arline, nlines]
+  end
+  private :split_lines_with_nlines
   # Returns MatchData of the String at and before the first linebreak before the MatchData (inclusive)
   #
+  # Basically this returns String after the last linebreak of the input
+  #
+  # @example
+  #   pre_match_in_line("1\n2\n__abc")  # => #<MatchData "__abc"> pre_match=="1\n2\n"
+  #   pre_match_in_line("1\n2\n")       # => #<MatchData ""     > pre_match=="1\n2\n"
+  #   pre_match_in_line(      "__abc")  # => #<MatchData "__abc"> pre_match=="     "
+  #
   # @param strpre [String] String of prematch of the last MatchData
   # @param linebreak: [String] +\n+ etc (Default: $/)
   # @return [MatchData] m[0] is the string after the last linebreak before the matched data (exclusive) and m.pre_match is all the lines before that.
@@ -861,46 +908,124 @@ module PlainText
   end
   private :pre_match_in_line
+  # Get the line index numbers of the first and last lines of the mathced string
+  #
+  # It is the Ruby index number as used in the each_line method.
+  #
+  # Matched String can span for multi-lines.
+  #
+  # Note if matched string is empty, it still is treated as significant.
+  #
+  # @param mat [MatchData, String] If String, it is User's (last) matched String.
+  # @param strpre [String, nil] Pre-match from the beginning of self to the mathced string, if mat is String.
+  # @param linebreak: [String] +\n+ etc (Default: $/)
+  # @return [Hash<Integer, nil>] 4 keys: :last_prematch, :first_matched, :last_matched, :first_post_match
+  def _matched_line_indices(mat, strpre=nil, linebreak: $/)
+    if mat.class.method_defined? :post_match
+      # mat is MatchData
+      strmatched, strpre = mat[0], mat.pre_match
+    else
+      strmatched = mat.to_str  rescue raise_typeerror(mat, 'String')
+    end
+    hsret = {
+      # last_prematch: nil,
+      first_matched: nil,
+      last_matched: nil,
+      # first_post_match: nil
+    }
+    _, hsret[:first_matched] = _ilines_consecutive(strpre, linebreak: linebreak)
+    hsret[:last_matched], _  = _ilines_consecutive(strpre+strmatched, linebreak: linebreak)
+    hsret
+  end
+  private :_matched_line_indices
+  # Returns line number of the three lines.
+  #
+  # In the following order:
+  #
+  # 1. Index of the last line of the argument String (number of lines - 1) (or nil if it becomes negative).
+  # 2. the 1st one plus 1 IF the last line ends with a linebreak. Or, 0 if the first one is empty. Otherwise the same as the 1st.
+  #
+  # @return [Array<Integer, nil>] nil if the value is invalid.
+  def _ilines_consecutive(str, linebreak: $/)
+    return [nil, 0] if str.empty?
+    # Only if first ends with a linebreak, increment by 1 for the second.
+    nmatched, with_linened = PlainText::Split.count_regexp(str, linebreak, with_if_end: true)
+    first  = nmatched + (with_linened ? 0 : 1) - 1
+    second = first    + (with_linened ? 1 : 0)
+    first  = nil if first < 0
+    [first, second]
+  end
+  private :_ilines_consecutive
   # Returns MatchData of the String after the MatchData to the linebreak (inclusive)
   #
-  # @param mat [MatchData, String]
-  # @param strpost [String, nil] Post-match, if mat is String.
+  # @param mat [MatchData, String] If String, it is User's (last) matched String.
+  # @param strpost [String, nil] Post-match, if mat is String.  After User's last match.
   # @param linebreak: [String] +\n+ etc (Default: $/)
-  # @return [MatchData] m[0] is the string after matched data and up to the next first linebreak (inclusive) (or empty string if the last character(s) of matched data is the linebreak) and m.post_match is all the lines after that.
+  # @return [MatchData] m[0] is the string after matched data and up to the next first linebreak (inclusive) (or empty string if the last character(s) of matched data is the linebreak) and m.post_match is all the lines after that.  (maybe nil?? not sure...)
   def post_match_in_line(mat, strpost=nil, linebreak: $/)
+    lb_quo = Regexp.quote linebreak
     if mat.class.method_defined? :post_match
       # mat is MatchData
       strmatched, strpost = mat[0], mat.post_match
     else
       strmatched = mat.to_str rescue raise_typeerror(mat, 'String')
     end
-    lb_quo = Regexp.quote linebreak
-    return /\A/.match if /#{lb_quo}\z/ =~ strmatched
+    return(/\A/.match strpost) if /#{lb_quo}\z/ =~ strmatched
+    return(/\A/.match strpost) if strpost.empty?
     /.*?#{lb_quo}/m.match strpost  # non-greedy match and m option are required, as linebreak can be any characters.
   end
   private :post_match_in_line
   # tail command with Regexp
   #
+  # == Algorithm
+  #
+  # 1. Split the String with Regexp with {PlainText::Split#split_with_delimiter}
+  # 2. Find the last matched String.
+  # 3. Find the "line"-index-number of the matched String.
+  # 4. Adjust the line index number depending inclusive/exclusive
+  # 5. Add positive/negative padding number
+  # 6. pass it to {#head_inverse} (after Line-1).
+  #
   # @param re_in [Regexp] Regexp to determine the boundary.
   # @param inclusive: [Boolean] If true (Default), the (entire) line that matches re_in is included in the result. Else the entire line is excluded.
   # @param linebreak: [String] +\n+ etc (Default: $/).
   # @return [String] as self
   # @see #tail
-  def tail_regexp(re_in, inclusive: true, linebreak: $/)
-    arst = split_with_delimiter re_in  # PlainText::Split#split_with_delimiter (included in String)
-    return self.class.new("") if 0 == arst.size  # Maybe self is a sub-class of String.
-    if inclusive
-      return pre_match_in_line( arst[0..-3].join, linebreak: linebreak)[0] + arst[-2] + arst[-1]
-      # Note: Even if (arst.size < 3), arst[0..-3] returns [].
-    else
-      return post_match_in_line(arst[-2], arst[-1], linebreak: linebreak).post_match
-    end
+  def tail_regexp(re_in, inclusive: true, padding: 0, linebreak: $/)
+    return self if empty?
+    # "split" with the given Regexp pattern (NOT with linebreak!)
+    #   arst == (Array<String>) [PreMatch, UsersMatch1, Str, UsersMatch2,..., UsersMatchN [, PostMatch]]
+    #   If the user's match comes at the very end, the last element does not exist.
+    arst = split_with_delimiter re_in  # PlainText::Split#split_with_delimiter (included in this module (hence maybe String))
+    # UsersMatch basically failed - no match.
+    return self[0,0] if arst.size <= 1
+    arst.push "" if arst.size.even?
+    # Now the last component is guarantee to be not the delimiter (= String of User's match)
+    #   arst == [PreMatch, UsersMatch1, ..., UsersMatchN, PostMatch(maybe-empty)]
+    # Minimum:
+    #   arst == [PreMatch, UsersMatch1, PostMatch]  (<= maybe much more PreMatch-es)
+    #   (Either/both PreMatch and PostMatch can be empty).
+    hslinenum = _matched_line_indices(arst[-2], arst[0..-3].join, linebreak: $/)
+    # Note: hslinenum[] is for indices, whereas the number of lines is
+    # required here to pass to head_inverse().
+    nlines_remove_to = (inclusive ? hslinenum[:first_matched] : hslinenum[:last_matched]+1) - padding
+    return self if nlines_remove_to <= 0  # everything
+    return head_inverse(nlines_remove_to)
   end
   private :tail_regexp
   # tail command based on the number of lines
   #
   # @param num_in [Integer] Original argument of the specified number of lines
@@ -910,7 +1035,7 @@ module PlainText
   # @see #tail
   def tail_linenum(num_in, num, linebreak: $/)
     arret = split(linebreak, -1)  # -1 is specified to preserve the last linebreak(s).
-    return self.class.new("") if arret.empty?
+    return self[0,0] if arret.empty?
     lb_quo = Regexp.quote linebreak
     if num_in > 0
@@ -918,7 +1043,7 @@ module PlainText
       num = 0  if num >= arret.size
     end
     ar = arret[(-num)..-1]
-    (ar.nil? || ar.empty?) ? self.class.new("") : ar.join(linebreak)
+    (ar.nil? || ar.empty?) ? self[0,0] : ar.join(linebreak)
   end
   private :tail_linenum

data/plain_text.gemspec CHANGED Viewed

@@ -5,17 +5,17 @@ require 'date'
 Gem::Specification.new do |s|
   s.name = %q{plain_text}.sub(/.*/){|c| (c == File.basename(Dir.pwd)) ? c : raise("ERROR: s.name=(#{c}) in gemspec seems wrong!")}
-  s.version = "0.4"
+  s.version = "0.5".sub(/.*/){|c| fs = Dir.glob('changelog{,.*}', File::FNM_CASEFOLD); raise('More than one ChangeLog exist!') if fs.size > 1; warn("WARNING: Version(s.version=#{c}) already exists in #{fs[0]} - ok?") if fs.size == 1 && !IO.readlines(fs[0]).grep(/^\(Version: #{Regexp.quote c}\)$/).empty? ; c }  # n.b., In macOS, changelog and ChangeLog are identical in default.
   # s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
-  %w(countchar textclean head.rb tail.rb).each do |f|
+  s.bindir = 'bin'
+  %w(countchar textclean head.rb tail.rb yard2md_afterclean).each do |f|
     path = s.bindir+'/'+f
     File.executable?(path) ? s.executables << f : raise("ERROR: Executable (#{path}) is not executable!")
   end
-  s.bindir = 'bin'
   s.authors = ["Masa Sakano"]
-  s.date = %q{2019-10-29}.sub(/.*/){|c| (Date.parse(c) == Date.today) ? c : raise("ERROR: s.date=(#{c}) is not today!")}
+  s.date = %q{2019-11-07}.sub(/.*/){|c| (Date.parse(c) == Date.today) ? c : raise("ERROR: s.date=(#{c}) is not today!")}
   s.summary = %q{Module to handle Plain-Text}
-  s.description = %q{This module provides utility functions and methods to handle plain text, classes Part/Paragraph/Boundary to represent the logical structure of a document and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby instance.}
+  s.description = %q{This module provides utility functions and methods to handle plain text, classes Part/Paragraph/Boundary to represent the logical structure of a document and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby instance. A few handy Ruby executable scripts to make use of them are included.}
   # s.email = %q{abc@example.com}
   s.extra_rdoc_files = [
     # "LICENSE",
@@ -34,6 +34,7 @@ Gem::Specification.new do |s|
     ret
   }
   s.files.reject! { |fn| File.symlink? fn }
   # s.add_runtime_dependency 'rails'
   # s.add_development_dependency "bourne", [">= 0"]
   s.homepage = %q{https://www.wisebabel.com}
@@ -47,5 +48,8 @@ Gem::Specification.new do |s|
   # s.rubygems_version = %q{1.3.5}      # This is always set automatically!!
   s.metadata["yard.run"] = "yri" # use "yard" to build full HTML docs.
+  s.metadata["changelog_uri"]     = "https://github.com/masasakano/#{s.name}/blob/master/ChangeLog"
+  s.metadata["source_code_uri"]   = "https://github.com/masasakano/#{s.name}"
+  # s.metadata["documentation_uri"] = "https://www.example.info/gems/bestgemever/0.0.1"
 end