RubyGems - plain_text - Versions diffs - 0.1 → 0.2 - Mend

plain_text 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/ChangeLog +20 -0
data/LICENSE.txt +20 -0
data/README.en.rdoc +1 -0
data/lib/plain_text/parse_rule.rb +15 -1
data/lib/plain_text.rb +103 -40
data/plain_text.gemspec +6 -4
data/test/test_plain_text.rb +25 -15
metadata +4 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 52d4c007bf2d127b5fed9c1edd5df87597ee5ca4689818244ecb05bcb6d0a8f1
-  data.tar.gz: 5af8e4489d714e272c8304911cbfdac18a736a49abf35d5143fd86ddf1d7e917
+  metadata.gz: 2c015ed947812371558456375c2933f0d03720082899f8c58c699419eda77f1b
+  data.tar.gz: fafc479d9bb492bd3b3ad140ec7a58d2cc0e7bc49dec4ad80c4111ad1f63e3df
 SHA512:
-  metadata.gz: 422f9de75686466409ce8d9c819226313974d6d308aa66dbede7b808aed8230b5efb3bbf838151d00031a9157e9109c936e3af3bc2dc51e447bce9fee2bfd81d
-  data.tar.gz: af8ca2904b51fb3ea3b822a49ae3e650ef5112af8f63d4b1df51ade1b31a0d083f712bf2b4f64f5eba17624c11264e6b7ffc8de8ce866541ef8d47a160be299c
+  metadata.gz: cb7d054e24cc85c64bbb556d4de30b3b54c9b51b409519d9b7f307fbe64dc05dc32e6e7cbeccc027b41c842a31ec5b489e60801b1c1c1f72e587157f62f38391
+  data.tar.gz: aef2b0ebd0c69f694c438cbf8d8e62d6d754d92c5d804553649c681d6c088bd9bb363197d9fb209b184aa49fb44ef5e733268e1d53a19bc7dfef260c86dee88c

data/ChangeLog CHANGED Viewed

@@ -1,3 +1,23 @@
+-----
+(Version: 0.2)
+2019-10-27  Masa Sakano
+  * Plain Text.clean_text
+    * Option name and default changed from `firstsps_style=:truncate` to `firstlbs_style=:delete`
+    * Default of Option `linehead_style` changed from :delete to : none
+    * Option `sps_style` now ignores the line head and tail in a new private class method `clean_text_sps!`
+    * Fixed bugs, including the one for Option choice `linetail_style: :markdown`
+  * New constant ParseRule::RuleEachLineStrip
+-----
+(Version: 0.2)
+2019-10-27  Masa Sakano
+  * Plain Text.clean_text
+    * Option name and default changed from `firstsps_style=:truncate` to `firstlbs_style=:delete`
+    * Default of Option `linehead_style` changed from :delete to : none
+    * Option `sps_style` now ignores the line head and tail in a new private class method `clean_text_sps!`
+    * Fixed bugs, including the one for Option choice `linetail_style: :markdown`
+  * New constant ParseRule::RuleEachLineStrip
 -----
 (Version: 0.1)
 2019-10-25  Masa Sakano

data/LICENSE.txt ADDED Viewed

@@ -0,0 +1,20 @@
+Copyright (c) 2012-2018 Scott Chacon and others
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.en.rdoc CHANGED Viewed

@@ -169,4 +169,5 @@ None.
 Author::  Masa Sakano < info a_t wisebabel dot com >
 Versions:: The versions of this package follow Semantic Versioning (2.0.0) http://semver.org/
+License:: MIT

data/lib/plain_text/parse_rule.rb CHANGED Viewed

@@ -81,6 +81,10 @@ module PlainText
   #   pt1.parts[0].parts[1] # => Paragraph::Title("Breaking!")
   #   pt1.boundaries[1]     # => Boundary("\n======\n")
   #
+  # @todo
+  #   It would be smarter each instance (Regexp and Part) has its own "name"
+  #   rather than this class holds @names as an Array.
+  #
   # @author Masa Sakano (Wise Babel Ltd)
   #
   class ParseRule
@@ -462,13 +466,23 @@ module PlainText
     def_lb_q = PlainText::DefLineBreaks.map{|i| Regexp.quote i}.join '|'
-    # {ParseRule} instance to
+    # {ParseRule} instance to
     # split a String with 2 or more linebreaks (with potentially white-spaces in between).
     # This instance can be dup-ped and used normally. However, if it is clone-d, the cloned instance would be unmodifiable.
     RuleConsecutiveLbs = self.new(/((?:#{def_lb_q})(?:#{def_lb_q}|[[:blank:]])*(?:#{def_lb_q}))/, name: 'ConsecutiveLbs') # => /((?:\r\n|\n|\r){2,}/
     RuleConsecutiveLbs.freeze
     RuleConsecutiveLbs.rules.freeze
     RuleConsecutiveLbs.names.freeze
+    # {ParseRule} instance to
+    # split a String with 1 linebreak that is potentially sandwiched with white-spaces
+    # (or a whitespace(s) at the very beginning or end).
+    # Essentially, each line (after Ruby-strip-ped) is treated as Paragraph.
+    # This instance can be dup-ped and used normally. However, if it is clone-d, the cloned instance would be unmodifiable.
+    RuleEachLineStrip = self.new(/(\A[[:space:]]+|[[:space:]]*\n[[:space:]]*|[[:space:]]+\z)/, name: 'EachLineStrip') # => /((?:\r\n|\n|\r){2,}/
+    RuleEachLineStrip.freeze
+    RuleEachLineStrip.rules.freeze
+    RuleEachLineStrip.names.freeze
   end # class ParseRule
 end # module PlainText

data/lib/plain_text.rb CHANGED Viewed

@@ -55,6 +55,7 @@ module PlainText
   # @return [Integer]
   def self.count_char(instr, *rest,
                  lbs_style: :delete,
+                 linehead_style: :delete,
                  lastsps_style: :delete,
                  lb_out: "\n",
                  **k)
@@ -72,9 +73,9 @@ module PlainText
   # * Blank lines are truncated into one line with no white spaces: +boundary_style=lb_out*2(=$/*2)+
   # * Consecutive white spaces are truncated into a single space: +sps_style=:truncate+
   # * White spaces before or after a CJK character is deleted: +delete_asian_space=true+
-  # * Preceding white spaces in each line are deleted: +linehead_style=:delete+
+  # * Preceding white spaces in each line are preserved: +linehead_style=:none+
   # * Trailing white spaces in each line are deleted: +linetail_style=:delete+
-  # * Preceding line-breaks and white spaces at the beginning of the entire input string are truncated into one space: +firstsps_style=:truncate+
+  # * Line-breaks at the beginning of the entire input string are deleted: +firstlbs_style=:delete+
   # * Trailing white spaces and line-breaks at the end of the entire input string are truncated into a single linebreak: +lastsps_style=:truncate+
   #
   # For a String with predominantly CJK characters, the following setting is recommended:
@@ -85,19 +86,25 @@ module PlainText
   # Note for the Symbols in optional arguments, the Symbol with the first character only is accepted,
   # e.g., +:d+ instead of +:delete+ (nb., +:t2+ for +:truncate2+).
   #
-  # For more detail, see the description.
+  # For more detail, see the description of each command-line options.
+  #
+  # Note that for the case of traditional genko-yoshi-style Japanese texts
+  # with "jisage" for each new paragraph marking a new paragraph, probably
+  # the best way is to make your own Part instance to give to this method,
+  # where the rule for the Part should be something like:
+  #   /(\A[[:blank:]]+|\n[[:space:]]+)/
   #
   # @param prt [PlainText:Part, String] {Part} or String to examine.
   # @param preserve_paragraph: [Boolean] Paragraphs are taken into account if true (Def: False). In the input, paragraphs are defined to be separated with more than one +lb+ with potentially some space characters in between. Their output style is specified with +boundary_style+.
   # @param boundary_style: [String, Symbol] One of +(:truncate|:truncate2|:delete|:none)+ or String. If String, the boundaries between paragraphs are replaced with this String (Def: +lb_out*2+).  If +:truncate+, consecutive linebreaks and spaces are truncated into 2 linebreaks.   +:truncate2+ are similar, but they are not truncated beyond 3 linebreaks (ie., up to 2 blank lines between Paragraphs). If +:none+, nothing is done about them. Unless :none, all the white spaces between linebreaks are deleted.
   # @param lbs_style: [Symbol] One of +(:truncate|:delete|:none)+ (Def: +:truncate+).  If :delete, all the linebreaks within paragraphs are deleted.  +:truncate+ is meaningful only when +preserve_paragraph=false+ and consecutive linebreaks are truncated into 1 linebreak.
-  # @param sps_style: [Symbol] One of +(:truncate|:delete|:none)+ (Def: +:truncate+).  If +:truncate+, the consecutive white spaces within paragraphs are truncated into a single white space. If :delete, they are deleted.
+  # @param sps_style: [Symbol] One of +(:truncate|:delete|:none)+ (Def: +:truncate+).  If +:truncate+, the consecutive white spaces within paragraphs, *except* for those at the line-head or line-tail (which are controlled by +linehead_style+ and +linehead_style+, respectively), are truncated into a single white space. If :delete, they are deleted.
   # @param lb_is_space: [Boolean] If true, a line-break, except those for the boundaries (unless +preserve_paragraph+ is false), is equivalent to a space (Def: False).
   # @param delete_asian_space: [Boolean] Any spaces between, before, after Asian characters (but punctuation) are deleted, if true (Default).
-  # @param linehead_style: [Symbol] One of +(:truncate|:delete|:none)+ (Def: :delete). Determine how to handle consecutive white spaces at the beggining of each line.
-  # @param linetail_style: [Symbol] One of +(:truncate|:delete|:markdown|:none)+ (Def: :delete). Determine how to handle consecutive white spaces at the end of each line.  If +:markdown:, two spaces at the end are preserved, whereas one or more than 2 consecutive spaces are deleted.
-  # @param firstsps_style: [Symbol, String] One of +(:truncate|:delete|:none)+ or String (Def: :default). If +:truncate+, any of white spaces and linebreaks at the very beginning of self, if exist, are truncated to a single white space (different from +lastsps_style+).  If String, they are, even if not exists, replaced with the specified String (such as a linebreak).  If +:delete+, they are deleted.
-  # @param lastsps_style: [Symbol, String] One of +(:truncate|:delete|:none|:linebreak)+ or String (Def: :truncate). If +:truncate+, any of white spaces and linebreaks at the very beginning of self, if exist, are truncated to a single white space (different from +firstsps_style+).  If +:delete+, they are deleted.  If String, they are, even if not exists, replaced with the specified String (such as a linebreak).  If +:linebreak+, +lb_out+ is used as String (i.e., only 1 linebreak always exists).
+  # @param linehead_style: [Symbol] One of +(:truncate|:delete|:none)+ (Def: :none). Determine how to handle consecutive white spaces at the beggining of each line.
+  # @param linetail_style: [Symbol] One of +(:truncate|:delete|:markdown|:none)+ (Def: :delete). Determine how to handle consecutive white spaces at the end of each line.  If +:markdown, 1 space is always deleted, and two or more spaces are truncated into two ASCII whitespaces *if* the last two spaces are ASCII whitespaces, or else untouched.
+  # @param firstlbs_style: [Symbol, String] One of +(:truncate|:delete|:none)+ or String (Def: :default). If +:truncate+, any linebreaks at the very beginning of self (and whitespaces in between), if exist, are truncated to a single linebreak.  If String, they are, even if not exists, replaced with the specified String (such as a linebreak).  If +:delete+, they are deleted.  Note This option has nothing to do with the whitespaces at the beginning of the first significant line (hence the name of the option).  Note if a (random) Part is given, this option only considers the first significant element of it.
+  # @param lastsps_style: [Symbol, String] One of +(:truncate|:delete|:none|:linebreak)+ or String (Def: :truncate). If +:truncate+, any of linebreaks *AND* white spaces at the tail of self, if exist, are truncated to a single linebreak.  If +:delete+, they are deleted.  If String, they are, even if not exists, replaced with the specified String (such as a linebreak, in which case +lb_out+ is used as String, i.e., it guarantees only 1 linebreak to exist at the end of the String).  Note if a (random) Part is given, this option only considers the last significant element of it.
   # @param lb: [String] Linebreak character like +\n+ etc (Default: $/). If this is one of the standard line-breaks, irregular line-breaks (for example, existence of CR when only LF should be there) are corrected.
   # @param lb_out: [String] Linebreak used for output (Default: +lb+)
   # @return same as prt
@@ -110,16 +117,16 @@ module PlainText
         lb_is_space: false,
         sps_style: :truncate,
         delete_asian_space: true,
-        linehead_style: :delete,
+        linehead_style: :none,
         linetail_style: :delete,
-        firstsps_style: :delete,
+        firstlbs_style: :delete,
         lastsps_style:  :truncate,
         lb: $/,
         lb_out: nil,           # If unspecified, will be replaced with lb
         is_debug: false
       )
-#isdebug = true if prt == "\n  ab\n  \ncd\n \n  \n ef\n \n  \n   \n  gh\n \n \n \n" #DEBUG
+isdebug = true if prt == "\n  \n abc\n\n \ndef\n\n \n\n"
     lb_out ||= lb  # Output linebreak
     boundary_style = lb_out*2 if true       == boundary_style
     boundary_style = ""       if [:delete, :d].include? boundary_style
@@ -128,7 +135,12 @@ module PlainText
     if !prt.class.method_defined? :last_significant_element
       # Construct a Part instance from the given String.
       ret = ''
-      prt = prt.unicode_normalize
+      begin
+        prt = prt.unicode_normalize
+      rescue ArgumentError  # (invalid byte sequence in UTF-8)
+        warn "The given String in (#{self.name}\##{__method__}) seems wrong."
+        raise
+      end
       prt = normalize_lb(prt, "\n", lb_from: (DefLineBreaks.include?(lb) ? nil : lb)).dup
       kwd = (["\r\n", "\r", "\n"].include?(lb) ? {} : { rules: /#{Regexp.quote lb}{2,}/})
       prt = (preserve_paragraph ? Part.parse(prt, **kwd) : Part.new([prt]))
@@ -148,6 +160,7 @@ module PlainText
       lb_is_space: lb_is_space,
       sps_style: sps_style,
       delete_asian_space: delete_asian_space,
+      is_debug: is_debug
     )
     # Handles the line head/tails.
     clean_text_line_head_tail!( prt,
@@ -157,8 +170,9 @@ module PlainText
     # Handles the file head/tail.
     clean_text_file_head_tail!( prt,
-      firstsps_style: firstsps_style,
+      firstlbs_style: firstlbs_style,
       lastsps_style:  lastsps_style,
+      is_debug: isdebug
     )
     # Replaces the linebreaks to the specified one
@@ -284,29 +298,29 @@ module PlainText
       )
     # Linebreaks and spaces
-    [[lbs_style, "\n", "\n"], [sps_style, '[[:blank:]]', " "]].each do |ea|
-      #          FROM  TO                     FROM       TO
-      case ea[0]
-      when :truncate, :t
-        prt.parts.each{|ec| ec.gsub!(/#{ea[1]}{2,}/m, ea[2])}
-      when :delete, :d
-        prt.parts.each{|ec| ec.gsub!(/#{ea[1]}/m, "")}
-      when :none, :n
-      else
-        raise ArgumentError
-      end
+    case lbs_style
+    when :truncate,   :t
+      prt.parts.each{|ec| ec.gsub!(/\n{2,}/m, "\n")}
+    when :delete,   :d
+      prt.parts.each{|ec| ec.gsub!(/\n/m, "")}
+    when :none, :n
+      # Does nothing
+    else
+      raise ArgumentError
     end
+    # Handles spaces in each line
+    clean_text_sps!(prt, sps_style: sps_style, is_debug: is_debug)
     # Linebreaks become spaces
     if lb_is_space
       prt.parts.each{|ec| ec.gsub!(/\n/m, " ")}
-      prt.parts.each{|ec| ec.gsub!(/\n{2,}/m, "\n")} if lbs_style == :truncate
+      clean_text_sps!(prt, sps_style: sps_style, is_debug: is_debug) if sps_style == :truncate
     end
     # Ignore spaces between, before, and after Asian characters.
     if delete_asian_space
-      # prt.map_parts do |ea_p|
-      prt.parts.each do |ea_p|
+      prt.parts.each do |ea_p|
         PlainText.extend_this(ea_p)
         ea_p.delete_spaces_bw_cjk_european!  # Destructive change in prt.
       end
@@ -322,7 +336,7 @@ module PlainText
   # @see Plaintext.clean_text
   def self.clean_text_line_head_tail!(
         prt,
-        linehead_style: :delete,
+        linehead_style: :none,
         linetail_style: :delete,
         is_debug: false
       )
@@ -348,7 +362,7 @@ module PlainText
     when :markdown, :m
       # Two spaces are preserved
       prt.parts.each{|ec| ec.gsub!(/(?:^|(?<![[:blank:]]))[[:blank:]]$/, "")}  # A single space is deleted.
-      prt.parts.each{|ec| ec.gsub!(/[[:blank:]]*  $/, "  ")}  # 3 or more spaces are truncated into 2 spaces, only IF the last two spaces are the ASCII spaces.
+      prt.parts.each{|ec| ec.gsub!(/[[:blank:]]+  $/, "  ")}  # 3 or more spaces are truncated into 2 spaces, only IF the last two spaces are the ASCII spaces.
     when :none, :n
       # Do nothing
     else
@@ -358,36 +372,47 @@ module PlainText
   private_class_method :clean_text_line_head_tail!
   # @param prt [PlainText:Part] (see Plaintext.clean_text#prt)
-  # @param firstsps_style [Symbol, String] (see Plaintext.clean_text#firstsps_style)
+  # @param firstlbs_style [Symbol, String] (see Plaintext.clean_text#firstlbs_style)
   # @param lastsps_style [Symbol, String]  (see Plaintext.clean_text#lastsps_style)
   # @return [void]
   #
   # @see Plaintext.clean_text
   def self.clean_text_file_head_tail!(
         prt,
-        firstsps_style: :delete,
+        firstlbs_style: :delete,
         lastsps_style:  :truncate,
         is_debug: false
       )
     # Handles the beginning of the given Part.
-    obj = prt.first_significant_element
-    # The first significant element is either Paragraph or Background.  Either way,
-    # the beginning of the next element would not have any [[:space:]].
+    obj = prt.first_significant_element || return
+    # The first significant element is either Paragraph or Background.
+    # obj may be nil.
-    case firstsps_style
+    case firstlbs_style
     when String
-      obj.sub!(/\A[[:space:]]*/m, firstsps_style)
+      # This assumes the first Background is not
+      #   (1) containing any non-space characters,
+      #   (2) white-spaces only AND the first Paragraph starts from a linebreak.
+      # You can assume it as long as String is the original input.
+      # However, if the input is Part, anything can be possible, like
+      # first multiple Backgrounds contain a linebreak for each, each of which
+      # follows an empty Paragraph...
+      #  The thing is, if String is always returned, it is much easier
+      # to process after Part#join.  However, the method may return Part.
+      # Therefore, you cannot do it!
+      # I explain it in the document in {self.clean_text}.
+      obj.sub!(/\A([[:space:]]*\n)?/m, firstlbs_style)
     when :truncate, :t
-      # The initial blank lines, if exist, are truncated to a single " "
-      obj.sub!(/\A[[:space:]]+/m, " ")
+      # The initial blank lines, if exist, are truncated to a single "\n"
+      obj.sub!(/\A[[:space:]]*\n/m, "\n")
     when :delete, :d
-      # The initial blank lines and white spaces are deleted.
+      # The initial blank lines are deleted.
       obj.sub!(/\A[[:space:]]*\n/m, "")
     when :none, :n
       # Do nothing
     else
-      raise ArgumentError, "Invalid firstsps_style (#{firstsps_style.inspect}) is specified."
+      raise ArgumentError, "Invalid firstlbs_style (#{firstlbs_style.inspect}) is specified."
     end
     # Handles the end of the given Part.
@@ -423,6 +448,43 @@ module PlainText
   private_class_method :clean_text_file_head_tail!
+  # Handles spaces within Paragraphs
+  #
+  # uses Part to transform a Paragraph into a Part
+  #
+  # @param prt [PlainText:Part] (see Plaintext.clean_text#prt)
+  # @param sps_style (see Plaintext.clean_text#sps_style)
+  # @return [void]
+  #
+  # @see Plaintext.clean_text
+  def self.clean_text_sps!(
+        prt,
+        sps_style: :truncate,
+        is_debug: false
+      )
+    prt.parts.each do |e_pa|
+      ru = ParseRule
+      # Each line treated as a Paragraph, and [[:space:]]+ between them as a Boundary.
+      # Then, to work on anything within a line except for line-head/tail is easy.
+      prt_para = Part.parse(e_pa, rule: ParseRule::RuleEachLineStrip).map_parts { |e_li|
+        case sps_style
+        when :truncate, :t
+          e_li.gsub(/[[:blank:]]{2,}/m, " ")
+        when :delete, :d
+          e_li.gsub(/[[:blank:]]+/m, "")
+        when :none, :n
+          e_li
+        else
+          raise ArgumentError
+        end
+      } # map_parts
+      e_pa.replace prt_para.join
+    end
+  end
+  private_class_method :clean_text_sps!
   ####################################################
   # Instance methods
   ####################################################
@@ -438,6 +500,7 @@ module PlainText
   # @return [Integer]
   def count_char(*rest,
                  lbs_style: :delete,
+                 linehead_style: :delete,
                  lastsps_style: :none,
                  lb_out: "\n",
                  **k)

data/plain_text.gemspec CHANGED Viewed

@@ -1,17 +1,19 @@
 # -*- encoding: utf-8 -*-
 require 'rake'
+require 'date'
 Gem::Specification.new do |s|
-  s.name = %q{plain_text}
-  s.version = "0.1"
+  s.name = %q{plain_text}.sub(/.*/){|c| (c == File.basename(Dir.pwd)) ? c : raise("ERROR: s.name=(#{c}) in gemspec seems wrong!")}
+  s.version = "0.2"
   # s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   %w(countchar).each do |f|
-    s.executables << f
+    path = s.bindir+'/'+f
+    File.executable?(path) ? s.executables << f : raise("ERROR: Executable (#{path}) is not executable!")
   end
   s.bindir = 'bin'
   s.authors = ["Masa Sakano"]
-  s.date = %q{2019-10-25}
+  s.date = %q{2019-10-27}.sub(/.*/){|c| (Date.parse(c) == Date.today) ? c : raise("ERROR: s.date=(#{c}) is not today!")}
   s.summary = %q{Module to handle Plain-Text}
   s.description = %q{This module provides utility functions and methods to handle plain text, classes Part/Paragraph/Boundary to represent the logical structure of a document and ParseRule to describe the rules to parse plain text to produce a Part-type Ruby instance.}
   # s.email = %q{abc@example.com}

data/test/test_plain_text.rb CHANGED Viewed

@@ -77,9 +77,9 @@ class TestUnitPlainText < MiniTest::Test
   def test_clean_text03
     assert_raises(ArgumentError){ PT.clean_text("abc", boundary_style: nil) }
     s1  = "abc  \n  \n  def\n\n"
-    s20 = "abc\n  \ndef\n"
-    s21 = "abcXYZdefXYZ"
-    s22 = "abc\n\ndef\n"
+    s20 = "abc\n  \n  def\n"
+    s21 = "abcXYZ  defXYZ"
+    s22 = "abc\n\n  def\n"
     sr = PT.clean_text(s1, boundary_style: :none)
     assert_equal s20, sr, prerr(s20, sr)
     sr = PT.clean_text(s1, boundary_style: "XYZ")
@@ -105,36 +105,46 @@ class TestUnitPlainText < MiniTest::Test
     assert_equal s22, sr, prerr(s22, sr)
     s3  = "\nabc\n\ndef"
-    s41 =  " abc\n\ndefTT"
+    s41 = "\nabc\n\ndefTT"
     s42 = "\nabc\n\ndef"
-    sr = PT.clean_text(s3, firstsps_style: :truncate, lastsps_style: 'TT')
+    sr = PT.clean_text(s3, firstlbs_style: :truncate, lastsps_style: 'TT')
     assert_equal s41, sr, prerr(s41, sr)
-    sr = PT.clean_text(s3, firstsps_style: :none,     lastsps_style: :delete)
+    sr = PT.clean_text(s3, firstlbs_style: :none,     lastsps_style: :delete)
     assert_equal s42, sr, prerr(s42, sr)
   end
   def test_clean_text_boundary01
-    assert_raises(ArgumentError){ PT.clean_text("abc", boundary_style: nil) }
     s1  = "\n  ab\n  \ncd\n \n  \n ef\n \n  \n   \n  gh\n \n \n \n"
-    s21 =    " ab\n  \ncd\n \n  \n ef\n \n  \n   \n  gh\n"
+    s21 = "\n  ab\n  \ncd\n \n  \n ef\n \n  \n   \n  gh\n"
     s22 = "\n  ab\n\ncd\n\n ef\n\n  gh\n\n"
-    s23 =  "\n ab\n\ncd\n\n\n ef\n\n\n gh\n\n\n"
-    sr = PT.clean_text(s1, boundary_style: :n,  lastsps_style: :t, linehead_style: :n, firstsps_style: :t, sps_style: :n)
+    s23 = "\n ab\n\ncd\n\n\n ef\n\n\n gh\n\n\n"
+    sr = PT.clean_text(s1, boundary_style: :n,  lastsps_style: :t, linehead_style: :n, firstlbs_style: :t, sps_style: :n)
     assert_equal s21, sr, prerr(s21, sr)
-    sr = PT.clean_text(s1, boundary_style: :t,  lastsps_style: :n, linehead_style: :n, firstsps_style: :n, sps_style: :n)
+    sr = PT.clean_text(s1, boundary_style: :t,  lastsps_style: :n, linehead_style: :n, firstlbs_style: :n, sps_style: :n)
     assert_equal s22, sr, prerr(s22, sr)
-    sr = PT.clean_text(s1, boundary_style: :t2, lastsps_style: :n, linehead_style: :t, firstsps_style: :n, sps_style: :n)
+    sr = PT.clean_text(s1, boundary_style: :t2, lastsps_style: :n, linehead_style: :t, firstlbs_style: :n, sps_style: :n)
     assert_equal s23, sr, prerr(s23, sr)
   end
+  def test_clean_text_markdown01
+    s0  = "\n  ab \n \n   cd  \n \n\n ef   \n \ngh   \t \n\nij \t  \n\nkl   \u3000 \n\nmn"
+    s21 =     "\n  ab\n\n   cd  \n\n ef  \n\ngh   \t \n\nij  \n\nkl   \u3000 \n\nmn"
+    s22 =       "  ab\n\n   cd  \n\n ef  \n\ngh   \t \n\nij  \n\nkl   \u3000 \n\nmn"
+    sr = PT.clean_text(s0, linehead_style: :n, linetail_style: :m, firstlbs_style: :none)
+    assert_equal s21, sr, prerr(s21, sr)
+    sr = PT.clean_text(s0, linehead_style: :n, linetail_style: :m, firstlbs_style: :delete)
+    assert_equal s22, sr, prerr(s22, sr)
+  end
   def test_clean_text_part01
     s0  = "\n  \n abc\n\n \ndef\n\n \n\n"
-    s1  = "TTabc\n\ndef\n"
+    s1  = "TT abc\n\ndef\n"
     p00 = PT::Part.parse s0
     p0  = PT::Part.parse s0
-    sr = PT.clean_text(s0, firstsps_style: 'TT')
+    sr = PT.clean_text(s0, firstlbs_style: 'TT')
     assert_equal s1, sr, prerr(s1, sr)
-    sr = PT.clean_text(p0, firstsps_style: 'TT')
+    sr = PT.clean_text(p0, firstlbs_style: 'TT')
     assert_equal PT::Part, sr.class
     assert_equal s1,  sr.join
     assert_equal p00, p0, prerr(p00, p0)  # p0 is deepcopied?

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: plain_text
 version: !ruby/object:Gem::Version
-  version: '0.1'
+  version: '0.2'
 platform: ruby
 authors:
 - Masa Sakano
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-10-25 00:00:00.000000000 Z
+date: 2019-10-27 00:00:00.000000000 Z
 dependencies: []
 description: This module provides utility functions and methods to handle plain text,
   classes Part/Paragraph/Boundary to represent the logical structure of a document
@@ -23,6 +23,7 @@ extra_rdoc_files:
 files:
 - ".gitignore"
 - ChangeLog
+- LICENSE.txt
 - Makefile
 - README.en.rdoc
 - Rakefile
@@ -60,8 +61,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.7.3
+rubygems_version: 3.0.3
 signing_key:
 specification_version: 4
 summary: Module to handle Plain-Text