string_splitter 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +1 -1
- data/lib/string_splitter/version.rb +1 -1
- data/lib/string_splitter.rb +95 -49
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67fd08fc0c1d5928d849206b28130eadedbd7c38755f1c123f4c3d46cbbc5619
|
4
|
+
data.tar.gz: b102be89d4c59f9a2d3dd4661277a4fd3a31816f7dbae1630f2f6954bedad62a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87d567793e20367c52625d5fa9dd6cea5470221b3d53bc54d0bd59f0f8835635d81a67e1fcecba5fcce5a116c6ba6c346c4b74fa563ac21bee5ff0d06d07ad8b
|
7
|
+
data.tar.gz: eab3f78e4c61e77c7bb283eb50e871d665fd4a323913e9fbf525ab2a6bfa05f0ebbabf490284371c00be6690f937f485823a8bc9dc3f59aafc9bff71c8cbe893
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
data/lib/string_splitter.rb
CHANGED
@@ -137,68 +137,114 @@ class StringSplitter
|
|
137
137
|
end
|
138
138
|
|
139
139
|
ncaptures = match.captures.length
|
140
|
+
delimiter = increment_backrefs(delimiter, ncaptures)
|
141
|
+
parts = string.split(/(#{delimiter})/, -1)
|
142
|
+
remove_trailing_empty_field!(parts, ncaptures)
|
143
|
+
result, splits = splits_for(parts, ncaptures)
|
144
|
+
count = splits.length
|
145
|
+
block ||= at ? match_positions(at, count) : ACCEPT
|
140
146
|
|
141
|
-
|
142
|
-
|
143
|
-
# is added e.g. to split on:
|
144
|
-
#
|
145
|
-
# - <foo-comment> ... </foo-comment>
|
146
|
-
# - <bar-comment> ... </bar-comment>
|
147
|
-
#
|
148
|
-
# etc.
|
149
|
-
#
|
150
|
-
# before:
|
151
|
-
#
|
152
|
-
# %r| <(\w+-comment)> [^<]* </\1> |x
|
153
|
-
#
|
154
|
-
# after:
|
155
|
-
#
|
156
|
-
# %r| ( <(\w+-comment)> [^<]* </\2> ) |x
|
147
|
+
[result, block, splits, count, -1]
|
148
|
+
end
|
157
149
|
|
150
|
+
# increment back-references so they remain valid when the outer capture
|
151
|
+
# is added.
|
152
|
+
#
|
153
|
+
# e.g. to split on:
|
154
|
+
#
|
155
|
+
# - <foo-comment> ... </foo-comment>
|
156
|
+
# - <bar-comment> ... </bar-comment>
|
157
|
+
#
|
158
|
+
# etc.
|
159
|
+
#
|
160
|
+
# before:
|
161
|
+
#
|
162
|
+
# %r| <(\w+-comment)> [^<]* </\1> |x
|
163
|
+
#
|
164
|
+
# after:
|
165
|
+
#
|
166
|
+
# %r| ( <(\w+-comment)> [^<]* </\2> ) |x
|
167
|
+
|
168
|
+
def increment_backrefs(delimiter, ncaptures)
|
169
|
+
if delimiter.is_a?(Regexp) && ncaptures > 0
|
158
170
|
delimiter = delimiter.to_s.gsub(/\\(?:(\d+)|.)/) do
|
159
171
|
match = Regexp.last_match
|
160
172
|
match[1] ? '\\' + match[1].to_i.next.to_s : match[0]
|
161
173
|
end
|
162
174
|
end
|
163
175
|
|
164
|
-
|
165
|
-
|
166
|
-
count = splits.length
|
176
|
+
delimiter
|
177
|
+
end
|
167
178
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
179
|
+
# work around Ruby's (and Perl's and Groovy's) unhelpful behavior when splitting
|
180
|
+
# on an empty string/pattern without removing trailing empty fields e.g.:
|
181
|
+
#
|
182
|
+
# "foobar".split("", -1)
|
183
|
+
# "foobar".split(//, -1)
|
184
|
+
# # => ["f", "o", "o", "b", "a", "r", ""]
|
185
|
+
#
|
186
|
+
# "foobar".split(/()/, -1)
|
187
|
+
# # => ["f", "", "o", "", "o", "", "b", "", "a", "", "r", "", ""]
|
188
|
+
#
|
189
|
+
# "foobar".split(/(())/, -1)
|
190
|
+
# # => ["f", "", "", "o", "", "", "o", "", "", "b", "", "", "a", "", "", "r", "", "", ""]
|
191
|
+
#
|
192
|
+
# *there is no such thing as an empty field whose separator is empty*, so
|
193
|
+
# if String#split's result ends with an empty separator, 0 or more (empty)
|
194
|
+
# captures and an empty field, we can safely remove them.
|
195
|
+
|
196
|
+
def remove_trailing_empty_field!(parts, ncaptures)
|
197
|
+
# the trailing field is at index -1. if there are 0 captures, the separator
|
198
|
+
# is at -2:
|
199
|
+
#
|
200
|
+
# [empty_separator, empty_field]
|
201
|
+
#
|
202
|
+
# if there is 1 capture, the separator is at -3:
|
203
|
+
#
|
204
|
+
# [empty_separator, capture, empty_field]
|
205
|
+
#
|
206
|
+
# etc. therefore we find the separator by walking back
|
207
|
+
#
|
208
|
+
# 1 (empty field)
|
209
|
+
# + ncaptures
|
210
|
+
# + 1 (separator)
|
211
|
+
#
|
212
|
+
# steps from the end of the array i.e. ncaptures + 2
|
213
|
+
count = ncaptures + 2
|
214
|
+
separator_index = count * -1
|
215
|
+
|
216
|
+
return unless parts[-1].empty? && parts[separator_index].empty?
|
217
|
+
|
218
|
+
# drop the empty separator, the (empty) captures, and the trailing empty field
|
219
|
+
parts.pop(count)
|
220
|
+
end
|
193
221
|
|
194
|
-
|
195
|
-
|
196
|
-
|
222
|
+
def match_positions(positions, nsplits)
|
223
|
+
positions = Array(positions).map do |position|
|
224
|
+
if position.is_a?(Integer) && position.negative?
|
225
|
+
# translate negative indices to 1-based non-negative indices e.g:
|
226
|
+
#
|
227
|
+
# ss.split("foo:bar:baz:quux", ":", at: -1)
|
228
|
+
#
|
229
|
+
# translates to:
|
230
|
+
#
|
231
|
+
# ss.split("foo:bar:baz:quux", ":", at: 3)
|
232
|
+
#
|
233
|
+
# XXX note: we don't use modulo, because we don't want
|
234
|
+
# out-of-bounds indices to silently work e.g. we don't want:
|
235
|
+
#
|
236
|
+
# ss.split("foo:bar:baz:quux", ":", -42)
|
237
|
+
#
|
238
|
+
# to mysteriously match when the position is 2
|
239
|
+
|
240
|
+
nsplits + 1 + position
|
197
241
|
else
|
198
|
-
|
242
|
+
position
|
199
243
|
end
|
200
244
|
end
|
201
245
|
|
202
|
-
|
246
|
+
lambda do |split|
|
247
|
+
case split.position when *positions then true else false end
|
248
|
+
end
|
203
249
|
end
|
204
250
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chocolateboy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: values
|