string_splitter 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 128e1b2cc29cb122f3d5040f7c9e115688532c7e68c59f0a5373291e995642f9
4
- data.tar.gz: 6b8729b7fb59aa984c1940ff0f9a1a308dded8b77c7db2b3c2a2ad4cdbd8bd52
3
+ metadata.gz: 67fd08fc0c1d5928d849206b28130eadedbd7c38755f1c123f4c3d46cbbc5619
4
+ data.tar.gz: b102be89d4c59f9a2d3dd4661277a4fd3a31816f7dbae1630f2f6954bedad62a
5
5
  SHA512:
6
- metadata.gz: 3aa949fb5ac46369af379e2fd28bc18f7c93746515aea76005d606a4cb9f20426dec353bef8406264078cdccf89cdaca99902d961687f12fb72be08d9f2b0072
7
- data.tar.gz: acd982d39a003be78b4548992cf108141e89e51a58f918e10515fb01dd1fd562db319e5c0dc5475d3e74739726e1fd752bbee4850b823705fb9520ff6b05e99f
6
+ metadata.gz: 87d567793e20367c52625d5fa9dd6cea5470221b3d53bc54d0bd59f0f8835635d81a67e1fcecba5fcce5a116c6ba6c346c4b74fa563ac21bee5ff0d06d07ad8b
7
+ data.tar.gz: eab3f78e4c61e77c7bb283eb50e871d665fd4a323913e9fbf525ab2a6bfa05f0ebbabf490284371c00be6690f937f485823a8bc9dc3f59aafc9bff71c8cbe893
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.3.1 - 2018-06-24
2
+
3
+ - remove trailing empty field when the separator is empty (#1)
4
+
1
5
  ## 0.3.0 - 2018-06-23
2
6
 
3
7
  - **breaking change**: rename the `default_separator` option to `default_delimiter`
data/README.md CHANGED
@@ -179,7 +179,7 @@ ss.split(line, at: [1..5, 8])
179
179
 
180
180
  # VERSION
181
181
 
182
- 0.3.0
182
+ 0.3.1
183
183
 
184
184
  # SEE ALSO
185
185
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class StringSplitter
4
- VERSION = '0.3.0'
4
+ VERSION = '0.3.1'
5
5
  end
@@ -137,68 +137,114 @@ class StringSplitter
137
137
  end
138
138
 
139
139
  ncaptures = match.captures.length
140
+ delimiter = increment_backrefs(delimiter, ncaptures)
141
+ parts = string.split(/(#{delimiter})/, -1)
142
+ remove_trailing_empty_field!(parts, ncaptures)
143
+ result, splits = splits_for(parts, ncaptures)
144
+ count = splits.length
145
+ block ||= at ? match_positions(at, count) : ACCEPT
140
146
 
141
- if delimiter.is_a?(Regexp) && ncaptures > 0
142
- # increment back-references so they remain valid when the outer capture
143
- # is added e.g. to split on:
144
- #
145
- # - <foo-comment> ... </foo-comment>
146
- # - <bar-comment> ... </bar-comment>
147
- #
148
- # etc.
149
- #
150
- # before:
151
- #
152
- # %r| <(\w+-comment)> [^<]* </\1> |x
153
- #
154
- # after:
155
- #
156
- # %r| ( <(\w+-comment)> [^<]* </\2> ) |x
147
+ [result, block, splits, count, -1]
148
+ end
157
149
 
150
+ # increment back-references so they remain valid when the outer capture
151
+ # is added.
152
+ #
153
+ # e.g. to split on:
154
+ #
155
+ # - <foo-comment> ... </foo-comment>
156
+ # - <bar-comment> ... </bar-comment>
157
+ #
158
+ # etc.
159
+ #
160
+ # before:
161
+ #
162
+ # %r| <(\w+-comment)> [^<]* </\1> |x
163
+ #
164
+ # after:
165
+ #
166
+ # %r| ( <(\w+-comment)> [^<]* </\2> ) |x
167
+
168
+ def increment_backrefs(delimiter, ncaptures)
169
+ if delimiter.is_a?(Regexp) && ncaptures > 0
158
170
  delimiter = delimiter.to_s.gsub(/\\(?:(\d+)|.)/) do
159
171
  match = Regexp.last_match
160
172
  match[1] ? '\\' + match[1].to_i.next.to_s : match[0]
161
173
  end
162
174
  end
163
175
 
164
- parts = string.split(/(#{delimiter})/, -1)
165
- result, splits = splits_for(parts, ncaptures)
166
- count = splits.length
176
+ delimiter
177
+ end
167
178
 
168
- unless block
169
- if at
170
- at = Array(at).map do |index|
171
- if index.is_a?(Integer) && index.negative?
172
- # translate 1-based negative indices to 1-based positive
173
- # indices e.g:
174
- #
175
- # ss.split("foo:bar:baz:quux", ":", at: -1)
176
- #
177
- # translates to:
178
- #
179
- # ss.split("foo:bar:baz:quux", ":", at: 3)
180
- #
181
- # XXX note: we don't use modulo, because we don't want
182
- # out-of-bounds indices to silently work e.g. we don't want:
183
- #
184
- # ss.split("foo:bar:baz:quux", ":", -42)
185
- #
186
- # to mysteriously match when the index is 2
187
-
188
- count + 1 + index
189
- else
190
- index
191
- end
192
- end
179
+ # work around Ruby's (and Perl's and Groovy's) unhelpful behavior when splitting
180
+ # on an empty string/pattern without removing trailing empty fields e.g.:
181
+ #
182
+ # "foobar".split("", -1)
183
+ # "foobar".split(//, -1)
184
+ # # => ["f", "o", "o", "b", "a", "r", ""]
185
+ #
186
+ # "foobar".split(/()/, -1)
187
+ # # => ["f", "", "o", "", "o", "", "b", "", "a", "", "r", "", ""]
188
+ #
189
+ # "foobar".split(/(())/, -1)
190
+ # # => ["f", "", "", "o", "", "", "o", "", "", "b", "", "", "a", "", "", "r", "", "", ""]
191
+ #
192
+ # *there is no such thing as an empty field whose separator is empty*, so
193
+ # if String#split's result ends with an empty separator, 0 or more (empty)
194
+ # captures and an empty field, we can safely remove them.
195
+
196
+ def remove_trailing_empty_field!(parts, ncaptures)
197
+ # the trailing field is at index -1. if there are 0 captures, the separator
198
+ # is at -2:
199
+ #
200
+ # [empty_separator, empty_field]
201
+ #
202
+ # if there is 1 capture, the separator is at -3:
203
+ #
204
+ # [empty_separator, capture, empty_field]
205
+ #
206
+ # etc. therefore we find the separator by walking back
207
+ #
208
+ # 1 (empty field)
209
+ # + ncaptures
210
+ # + 1 (separator)
211
+ #
212
+ # steps from the end of the array i.e. ncaptures + 2
213
+ count = ncaptures + 2
214
+ separator_index = count * -1
215
+
216
+ return unless parts[-1].empty? && parts[separator_index].empty?
217
+
218
+ # drop the empty separator, the (empty) captures, and the trailing empty field
219
+ parts.pop(count)
220
+ end
193
221
 
194
- block = lambda do |split|
195
- case split.position when *at then true else false end
196
- end
222
+ def match_positions(positions, nsplits)
223
+ positions = Array(positions).map do |position|
224
+ if position.is_a?(Integer) && position.negative?
225
+ # translate negative indices to 1-based non-negative indices e.g:
226
+ #
227
+ # ss.split("foo:bar:baz:quux", ":", at: -1)
228
+ #
229
+ # translates to:
230
+ #
231
+ # ss.split("foo:bar:baz:quux", ":", at: 3)
232
+ #
233
+ # XXX note: we don't use modulo, because we don't want
234
+ # out-of-bounds indices to silently work e.g. we don't want:
235
+ #
236
+ # ss.split("foo:bar:baz:quux", ":", -42)
237
+ #
238
+ # to mysteriously match when the position is 2
239
+
240
+ nsplits + 1 + position
197
241
  else
198
- block = ACCEPT
242
+ position
199
243
  end
200
244
  end
201
245
 
202
- [result, block, splits, count, -1]
246
+ lambda do |split|
247
+ case split.position when *positions then true else false end
248
+ end
203
249
  end
204
250
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_splitter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - chocolateboy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-23 00:00:00.000000000 Z
11
+ date: 2018-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: values