RubyGems - string_splitter - Versions diffs - 0.3.0 → 0.3.1 - Mend

string_splitter 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/README.md +1 -1
data/lib/string_splitter/version.rb +1 -1
data/lib/string_splitter.rb +95 -49
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 128e1b2cc29cb122f3d5040f7c9e115688532c7e68c59f0a5373291e995642f9
-  data.tar.gz: 6b8729b7fb59aa984c1940ff0f9a1a308dded8b77c7db2b3c2a2ad4cdbd8bd52
+  metadata.gz: 67fd08fc0c1d5928d849206b28130eadedbd7c38755f1c123f4c3d46cbbc5619
+  data.tar.gz: b102be89d4c59f9a2d3dd4661277a4fd3a31816f7dbae1630f2f6954bedad62a
 SHA512:
-  metadata.gz: 3aa949fb5ac46369af379e2fd28bc18f7c93746515aea76005d606a4cb9f20426dec353bef8406264078cdccf89cdaca99902d961687f12fb72be08d9f2b0072
-  data.tar.gz: acd982d39a003be78b4548992cf108141e89e51a58f918e10515fb01dd1fd562db319e5c0dc5475d3e74739726e1fd752bbee4850b823705fb9520ff6b05e99f
+  metadata.gz: 87d567793e20367c52625d5fa9dd6cea5470221b3d53bc54d0bd59f0f8835635d81a67e1fcecba5fcce5a116c6ba6c346c4b74fa563ac21bee5ff0d06d07ad8b
+  data.tar.gz: eab3f78e4c61e77c7bb283eb50e871d665fd4a323913e9fbf525ab2a6bfa05f0ebbabf490284371c00be6690f937f485823a8bc9dc3f59aafc9bff71c8cbe893

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,7 @@
+## 0.3.1 - 2018-06-24
+- remove trailing empty field when the separator is empty (#1)
 ## 0.3.0 - 2018-06-23
 - **breaking change**: rename the `default_separator` option to `default_delimiter`

data/README.md CHANGED Viewed

@@ -179,7 +179,7 @@ ss.split(line, at: [1..5, 8])
 # VERSION
-0.3.0
+0.3.1
 # SEE ALSO

data/lib/string_splitter/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 class StringSplitter
-  VERSION = '0.3.0'
+  VERSION = '0.3.1'
 end

data/lib/string_splitter.rb CHANGED Viewed

@@ -137,68 +137,114 @@ class StringSplitter
     end
     ncaptures = match.captures.length
+    delimiter = increment_backrefs(delimiter, ncaptures)
+    parts = string.split(/(#{delimiter})/, -1)
+    remove_trailing_empty_field!(parts, ncaptures)
+    result, splits = splits_for(parts, ncaptures)
+    count = splits.length
+    block ||= at ? match_positions(at, count) : ACCEPT
-    if delimiter.is_a?(Regexp) && ncaptures > 0
-      # increment back-references so they remain valid when the outer capture
-      # is added e.g. to split on:
-      #
-      #   - <foo-comment> ... </foo-comment>
-      #   - <bar-comment> ... </bar-comment>
-      #
-      # etc.
-      #
-      # before:
-      #
-      #   %r|   <(\w+-comment)> [^<]* </\1>   |x
-      #
-      # after:
-      #
-      #   %r| ( <(\w+-comment)> [^<]* </\2> ) |x
+    [result, block, splits, count, -1]
+  end
+  # increment back-references so they remain valid when the outer capture
+  # is added.
+  #
+  # e.g. to split on:
+  #
+  #   - <foo-comment> ... </foo-comment>
+  #   - <bar-comment> ... </bar-comment>
+  #
+  # etc.
+  #
+  # before:
+  #
+  #   %r|   <(\w+-comment)> [^<]* </\1>   |x
+  #
+  # after:
+  #
+  #   %r| ( <(\w+-comment)> [^<]* </\2> ) |x
+  def increment_backrefs(delimiter, ncaptures)
+    if delimiter.is_a?(Regexp) && ncaptures > 0
       delimiter = delimiter.to_s.gsub(/\\(?:(\d+)|.)/) do
         match = Regexp.last_match
         match[1] ? '\\' + match[1].to_i.next.to_s : match[0]
       end
     end
-    parts = string.split(/(#{delimiter})/, -1)
-    result, splits = splits_for(parts, ncaptures)
-    count = splits.length
+    delimiter
+  end
-    unless block
-      if at
-        at = Array(at).map do |index|
-          if index.is_a?(Integer) && index.negative?
-            # translate 1-based negative indices to 1-based positive
-            # indices e.g:
-            #
-            #   ss.split("foo:bar:baz:quux", ":", at: -1)
-            #
-            # translates to:
-            #
-            #   ss.split("foo:bar:baz:quux", ":", at: 3)
-            #
-            # XXX note: we don't use modulo, because we don't want
-            # out-of-bounds indices to silently work e.g. we don't want:
-            #
-            #   ss.split("foo:bar:baz:quux", ":", -42)
-            #
-            # to mysteriously match when the index is 2
-            count + 1 + index
-          else
-            index
-          end
-        end
+  # work around Ruby's (and Perl's and Groovy's) unhelpful behavior when splitting
+  # on an empty string/pattern without removing trailing empty fields e.g.:
+  #
+  #   "foobar".split("", -1)
+  #   "foobar".split(//, -1)
+  #   # => ["f", "o", "o", "b", "a", "r", ""]
+  #
+  #   "foobar".split(/()/, -1)
+  #   # => ["f", "", "o", "", "o", "", "b", "", "a", "", "r", "", ""]
+  #
+  #   "foobar".split(/(())/, -1)
+  #   # => ["f", "", "", "o", "", "", "o", "", "", "b", "", "", "a", "", "", "r", "", "", ""]
+  #
+  # *there is no such thing as an empty field whose separator is empty*, so
+  # if String#split's result ends with an empty separator, 0 or more (empty)
+  # captures and an empty field, we can safely remove them.
+  def remove_trailing_empty_field!(parts, ncaptures)
+    # the trailing field is at index -1. if there are 0 captures, the separator
+    # is at -2:
+    #
+    #   [empty_separator, empty_field]
+    #
+    # if there is 1 capture, the separator is at -3:
+    #
+    #   [empty_separator, capture, empty_field]
+    #
+    # etc. therefore we find the separator by walking back
+    #
+    #  1 (empty field)
+    #  + ncaptures
+    #  + 1 (separator)
+    #
+    # steps from the end of the array i.e. ncaptures + 2
+    count = ncaptures + 2
+    separator_index = count * -1
+    return unless parts[-1].empty? && parts[separator_index].empty?
+    # drop the empty separator, the (empty) captures, and the trailing empty field
+    parts.pop(count)
+  end
-        block = lambda do |split|
-          case split.position when *at then true else false end
-        end
+  def match_positions(positions, nsplits)
+    positions = Array(positions).map do |position|
+      if position.is_a?(Integer) && position.negative?
+        # translate negative indices to 1-based non-negative indices e.g:
+        #
+        #   ss.split("foo:bar:baz:quux", ":", at: -1)
+        #
+        # translates to:
+        #
+        #   ss.split("foo:bar:baz:quux", ":", at: 3)
+        #
+        # XXX note: we don't use modulo, because we don't want
+        # out-of-bounds indices to silently work e.g. we don't want:
+        #
+        #   ss.split("foo:bar:baz:quux", ":", -42)
+        #
+        # to mysteriously match when the position is 2
+        nsplits + 1 + position
       else
-        block = ACCEPT
+        position
       end
     end
-    [result, block, splits, count, -1]
+    lambda do |split|
+      case split.position when *positions then true else false end
+    end
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: string_splitter
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.3.1
 platform: ruby
 authors:
 - chocolateboy
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-06-23 00:00:00.000000000 Z
+date: 2018-06-24 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: values