string_splitter 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +1 -1
- data/lib/string_splitter/version.rb +1 -1
- data/lib/string_splitter.rb +95 -49
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67fd08fc0c1d5928d849206b28130eadedbd7c38755f1c123f4c3d46cbbc5619
|
4
|
+
data.tar.gz: b102be89d4c59f9a2d3dd4661277a4fd3a31816f7dbae1630f2f6954bedad62a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 87d567793e20367c52625d5fa9dd6cea5470221b3d53bc54d0bd59f0f8835635d81a67e1fcecba5fcce5a116c6ba6c346c4b74fa563ac21bee5ff0d06d07ad8b
|
7
|
+
data.tar.gz: eab3f78e4c61e77c7bb283eb50e871d665fd4a323913e9fbf525ab2a6bfa05f0ebbabf490284371c00be6690f937f485823a8bc9dc3f59aafc9bff71c8cbe893
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
data/lib/string_splitter.rb
CHANGED
@@ -137,68 +137,114 @@ class StringSplitter
|
|
137
137
|
end
|
138
138
|
|
139
139
|
ncaptures = match.captures.length
|
140
|
+
delimiter = increment_backrefs(delimiter, ncaptures)
|
141
|
+
parts = string.split(/(#{delimiter})/, -1)
|
142
|
+
remove_trailing_empty_field!(parts, ncaptures)
|
143
|
+
result, splits = splits_for(parts, ncaptures)
|
144
|
+
count = splits.length
|
145
|
+
block ||= at ? match_positions(at, count) : ACCEPT
|
140
146
|
|
141
|
-
|
142
|
-
|
143
|
-
# is added e.g. to split on:
|
144
|
-
#
|
145
|
-
# - <foo-comment> ... </foo-comment>
|
146
|
-
# - <bar-comment> ... </bar-comment>
|
147
|
-
#
|
148
|
-
# etc.
|
149
|
-
#
|
150
|
-
# before:
|
151
|
-
#
|
152
|
-
# %r| <(\w+-comment)> [^<]* </\1> |x
|
153
|
-
#
|
154
|
-
# after:
|
155
|
-
#
|
156
|
-
# %r| ( <(\w+-comment)> [^<]* </\2> ) |x
|
147
|
+
[result, block, splits, count, -1]
|
148
|
+
end
|
157
149
|
|
150
|
+
# increment back-references so they remain valid when the outer capture
|
151
|
+
# is added.
|
152
|
+
#
|
153
|
+
# e.g. to split on:
|
154
|
+
#
|
155
|
+
# - <foo-comment> ... </foo-comment>
|
156
|
+
# - <bar-comment> ... </bar-comment>
|
157
|
+
#
|
158
|
+
# etc.
|
159
|
+
#
|
160
|
+
# before:
|
161
|
+
#
|
162
|
+
# %r| <(\w+-comment)> [^<]* </\1> |x
|
163
|
+
#
|
164
|
+
# after:
|
165
|
+
#
|
166
|
+
# %r| ( <(\w+-comment)> [^<]* </\2> ) |x
|
167
|
+
|
168
|
+
def increment_backrefs(delimiter, ncaptures)
|
169
|
+
if delimiter.is_a?(Regexp) && ncaptures > 0
|
158
170
|
delimiter = delimiter.to_s.gsub(/\\(?:(\d+)|.)/) do
|
159
171
|
match = Regexp.last_match
|
160
172
|
match[1] ? '\\' + match[1].to_i.next.to_s : match[0]
|
161
173
|
end
|
162
174
|
end
|
163
175
|
|
164
|
-
|
165
|
-
|
166
|
-
count = splits.length
|
176
|
+
delimiter
|
177
|
+
end
|
167
178
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
179
|
+
# work around Ruby's (and Perl's and Groovy's) unhelpful behavior when splitting
|
180
|
+
# on an empty string/pattern without removing trailing empty fields e.g.:
|
181
|
+
#
|
182
|
+
# "foobar".split("", -1)
|
183
|
+
# "foobar".split(//, -1)
|
184
|
+
# # => ["f", "o", "o", "b", "a", "r", ""]
|
185
|
+
#
|
186
|
+
# "foobar".split(/()/, -1)
|
187
|
+
# # => ["f", "", "o", "", "o", "", "b", "", "a", "", "r", "", ""]
|
188
|
+
#
|
189
|
+
# "foobar".split(/(())/, -1)
|
190
|
+
# # => ["f", "", "", "o", "", "", "o", "", "", "b", "", "", "a", "", "", "r", "", "", ""]
|
191
|
+
#
|
192
|
+
# *there is no such thing as an empty field whose separator is empty*, so
|
193
|
+
# if String#split's result ends with an empty separator, 0 or more (empty)
|
194
|
+
# captures and an empty field, we can safely remove them.
|
195
|
+
|
196
|
+
def remove_trailing_empty_field!(parts, ncaptures)
|
197
|
+
# the trailing field is at index -1. if there are 0 captures, the separator
|
198
|
+
# is at -2:
|
199
|
+
#
|
200
|
+
# [empty_separator, empty_field]
|
201
|
+
#
|
202
|
+
# if there is 1 capture, the separator is at -3:
|
203
|
+
#
|
204
|
+
# [empty_separator, capture, empty_field]
|
205
|
+
#
|
206
|
+
# etc. therefore we find the separator by walking back
|
207
|
+
#
|
208
|
+
# 1 (empty field)
|
209
|
+
# + ncaptures
|
210
|
+
# + 1 (separator)
|
211
|
+
#
|
212
|
+
# steps from the end of the array i.e. ncaptures + 2
|
213
|
+
count = ncaptures + 2
|
214
|
+
separator_index = count * -1
|
215
|
+
|
216
|
+
return unless parts[-1].empty? && parts[separator_index].empty?
|
217
|
+
|
218
|
+
# drop the empty separator, the (empty) captures, and the trailing empty field
|
219
|
+
parts.pop(count)
|
220
|
+
end
|
193
221
|
|
194
|
-
|
195
|
-
|
196
|
-
|
222
|
+
def match_positions(positions, nsplits)
|
223
|
+
positions = Array(positions).map do |position|
|
224
|
+
if position.is_a?(Integer) && position.negative?
|
225
|
+
# translate negative indices to 1-based non-negative indices e.g:
|
226
|
+
#
|
227
|
+
# ss.split("foo:bar:baz:quux", ":", at: -1)
|
228
|
+
#
|
229
|
+
# translates to:
|
230
|
+
#
|
231
|
+
# ss.split("foo:bar:baz:quux", ":", at: 3)
|
232
|
+
#
|
233
|
+
# XXX note: we don't use modulo, because we don't want
|
234
|
+
# out-of-bounds indices to silently work e.g. we don't want:
|
235
|
+
#
|
236
|
+
# ss.split("foo:bar:baz:quux", ":", -42)
|
237
|
+
#
|
238
|
+
# to mysteriously match when the position is 2
|
239
|
+
|
240
|
+
nsplits + 1 + position
|
197
241
|
else
|
198
|
-
|
242
|
+
position
|
199
243
|
end
|
200
244
|
end
|
201
245
|
|
202
|
-
|
246
|
+
lambda do |split|
|
247
|
+
case split.position when *positions then true else false end
|
248
|
+
end
|
203
249
|
end
|
204
250
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chocolateboy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: values
|