string_splitter 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 128e1b2cc29cb122f3d5040f7c9e115688532c7e68c59f0a5373291e995642f9
4
- data.tar.gz: 6b8729b7fb59aa984c1940ff0f9a1a308dded8b77c7db2b3c2a2ad4cdbd8bd52
3
+ metadata.gz: 67fd08fc0c1d5928d849206b28130eadedbd7c38755f1c123f4c3d46cbbc5619
4
+ data.tar.gz: b102be89d4c59f9a2d3dd4661277a4fd3a31816f7dbae1630f2f6954bedad62a
5
5
  SHA512:
6
- metadata.gz: 3aa949fb5ac46369af379e2fd28bc18f7c93746515aea76005d606a4cb9f20426dec353bef8406264078cdccf89cdaca99902d961687f12fb72be08d9f2b0072
7
- data.tar.gz: acd982d39a003be78b4548992cf108141e89e51a58f918e10515fb01dd1fd562db319e5c0dc5475d3e74739726e1fd752bbee4850b823705fb9520ff6b05e99f
6
+ metadata.gz: 87d567793e20367c52625d5fa9dd6cea5470221b3d53bc54d0bd59f0f8835635d81a67e1fcecba5fcce5a116c6ba6c346c4b74fa563ac21bee5ff0d06d07ad8b
7
+ data.tar.gz: eab3f78e4c61e77c7bb283eb50e871d665fd4a323913e9fbf525ab2a6bfa05f0ebbabf490284371c00be6690f937f485823a8bc9dc3f59aafc9bff71c8cbe893
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.3.1 - 2018-06-24
2
+
3
+ - remove trailing empty field when the separator is empty (#1)
4
+
1
5
  ## 0.3.0 - 2018-06-23
2
6
 
3
7
  - **breaking change**: rename the `default_separator` option to `default_delimiter`
data/README.md CHANGED
@@ -179,7 +179,7 @@ ss.split(line, at: [1..5, 8])
179
179
 
180
180
  # VERSION
181
181
 
182
- 0.3.0
182
+ 0.3.1
183
183
 
184
184
  # SEE ALSO
185
185
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class StringSplitter
4
- VERSION = '0.3.0'
4
+ VERSION = '0.3.1'
5
5
  end
@@ -137,68 +137,114 @@ class StringSplitter
137
137
  end
138
138
 
139
139
  ncaptures = match.captures.length
140
+ delimiter = increment_backrefs(delimiter, ncaptures)
141
+ parts = string.split(/(#{delimiter})/, -1)
142
+ remove_trailing_empty_field!(parts, ncaptures)
143
+ result, splits = splits_for(parts, ncaptures)
144
+ count = splits.length
145
+ block ||= at ? match_positions(at, count) : ACCEPT
140
146
 
141
- if delimiter.is_a?(Regexp) && ncaptures > 0
142
- # increment back-references so they remain valid when the outer capture
143
- # is added e.g. to split on:
144
- #
145
- # - <foo-comment> ... </foo-comment>
146
- # - <bar-comment> ... </bar-comment>
147
- #
148
- # etc.
149
- #
150
- # before:
151
- #
152
- # %r| <(\w+-comment)> [^<]* </\1> |x
153
- #
154
- # after:
155
- #
156
- # %r| ( <(\w+-comment)> [^<]* </\2> ) |x
147
+ [result, block, splits, count, -1]
148
+ end
157
149
 
150
+ # increment back-references so they remain valid when the outer capture
151
+ # is added.
152
+ #
153
+ # e.g. to split on:
154
+ #
155
+ # - <foo-comment> ... </foo-comment>
156
+ # - <bar-comment> ... </bar-comment>
157
+ #
158
+ # etc.
159
+ #
160
+ # before:
161
+ #
162
+ # %r| <(\w+-comment)> [^<]* </\1> |x
163
+ #
164
+ # after:
165
+ #
166
+ # %r| ( <(\w+-comment)> [^<]* </\2> ) |x
167
+
168
+ def increment_backrefs(delimiter, ncaptures)
169
+ if delimiter.is_a?(Regexp) && ncaptures > 0
158
170
  delimiter = delimiter.to_s.gsub(/\\(?:(\d+)|.)/) do
159
171
  match = Regexp.last_match
160
172
  match[1] ? '\\' + match[1].to_i.next.to_s : match[0]
161
173
  end
162
174
  end
163
175
 
164
- parts = string.split(/(#{delimiter})/, -1)
165
- result, splits = splits_for(parts, ncaptures)
166
- count = splits.length
176
+ delimiter
177
+ end
167
178
 
168
- unless block
169
- if at
170
- at = Array(at).map do |index|
171
- if index.is_a?(Integer) && index.negative?
172
- # translate 1-based negative indices to 1-based positive
173
- # indices e.g:
174
- #
175
- # ss.split("foo:bar:baz:quux", ":", at: -1)
176
- #
177
- # translates to:
178
- #
179
- # ss.split("foo:bar:baz:quux", ":", at: 3)
180
- #
181
- # XXX note: we don't use modulo, because we don't want
182
- # out-of-bounds indices to silently work e.g. we don't want:
183
- #
184
- # ss.split("foo:bar:baz:quux", ":", -42)
185
- #
186
- # to mysteriously match when the index is 2
187
-
188
- count + 1 + index
189
- else
190
- index
191
- end
192
- end
179
+ # work around Ruby's (and Perl's and Groovy's) unhelpful behavior when splitting
180
+ # on an empty string/pattern without removing trailing empty fields e.g.:
181
+ #
182
+ # "foobar".split("", -1)
183
+ # "foobar".split(//, -1)
184
+ # # => ["f", "o", "o", "b", "a", "r", ""]
185
+ #
186
+ # "foobar".split(/()/, -1)
187
+ # # => ["f", "", "o", "", "o", "", "b", "", "a", "", "r", "", ""]
188
+ #
189
+ # "foobar".split(/(())/, -1)
190
+ # # => ["f", "", "", "o", "", "", "o", "", "", "b", "", "", "a", "", "", "r", "", "", ""]
191
+ #
192
+ # *there is no such thing as an empty field whose separator is empty*, so
193
+ # if String#split's result ends with an empty separator, 0 or more (empty)
194
+ # captures and an empty field, we can safely remove them.
195
+
196
+ def remove_trailing_empty_field!(parts, ncaptures)
197
+ # the trailing field is at index -1. if there are 0 captures, the separator
198
+ # is at -2:
199
+ #
200
+ # [empty_separator, empty_field]
201
+ #
202
+ # if there is 1 capture, the separator is at -3:
203
+ #
204
+ # [empty_separator, capture, empty_field]
205
+ #
206
+ # etc. therefore we find the separator by walking back
207
+ #
208
+ # 1 (empty field)
209
+ # + ncaptures
210
+ # + 1 (separator)
211
+ #
212
+ # steps from the end of the array i.e. ncaptures + 2
213
+ count = ncaptures + 2
214
+ separator_index = count * -1
215
+
216
+ return unless parts[-1].empty? && parts[separator_index].empty?
217
+
218
+ # drop the empty separator, the (empty) captures, and the trailing empty field
219
+ parts.pop(count)
220
+ end
193
221
 
194
- block = lambda do |split|
195
- case split.position when *at then true else false end
196
- end
222
+ def match_positions(positions, nsplits)
223
+ positions = Array(positions).map do |position|
224
+ if position.is_a?(Integer) && position.negative?
225
+ # translate negative indices to 1-based non-negative indices e.g:
226
+ #
227
+ # ss.split("foo:bar:baz:quux", ":", at: -1)
228
+ #
229
+ # translates to:
230
+ #
231
+ # ss.split("foo:bar:baz:quux", ":", at: 3)
232
+ #
233
+ # XXX note: we don't use modulo, because we don't want
234
+ # out-of-bounds indices to silently work e.g. we don't want:
235
+ #
236
+ # ss.split("foo:bar:baz:quux", ":", -42)
237
+ #
238
+ # to mysteriously match when the position is 2
239
+
240
+ nsplits + 1 + position
197
241
  else
198
- block = ACCEPT
242
+ position
199
243
  end
200
244
  end
201
245
 
202
- [result, block, splits, count, -1]
246
+ lambda do |split|
247
+ case split.position when *positions then true else false end
248
+ end
203
249
  end
204
250
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_splitter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - chocolateboy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-23 00:00:00.000000000 Z
11
+ date: 2018-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: values