string_splitter 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -11
- data/README.md +32 -29
- data/lib/string_splitter.rb +34 -29
- data/lib/string_splitter/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 400534de6c3143ef81b2ad46a3a6432b7d83ef0900024ebdde3f06a4e1714890
|
4
|
+
data.tar.gz: 643f5af7b9e13321dfa97b045b124d0c5ea576868b13141c264122bc96baea5e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35bed8fe69b33314813fbd68a8da0e8f4799b7891275ac601b157caeb0e0a3780f37ec7e7876d808b8dfcbfdf7527f45c3af0dc0d679e133865e96949a1d9ce3
|
7
|
+
data.tar.gz: 8186e40d57654daf1a481ab74c128910f7aa346bc343a0a9933dc39b7cceeb204c1a55ac39b39321df46f7d02420fd87f93dd4a708be0a985d94833df018da87
|
data/CHANGELOG.md
CHANGED
@@ -1,22 +1,29 @@
|
|
1
|
+
## 0.7.0 - 2020-08-21
|
2
|
+
|
3
|
+
#### Breaking Changes
|
4
|
+
|
5
|
+
- `String#split` incompatibility: we no longer trim the string (with
|
6
|
+
`String#strip`) before splitting if the delimiter is omitted
|
7
|
+
|
1
8
|
## 0.6.0 - 2020-08-20
|
2
9
|
|
3
10
|
#### Breaking Changes
|
4
11
|
|
5
12
|
- `ss.split(str, " ")` is no longer treated the same as `ss.split(str)` i.e.
|
6
|
-
unlike Ruby's `String#split
|
7
|
-
|
13
|
+
unlike Ruby's `String#split`, the former no longer strips the string before
|
14
|
+
splitting
|
8
15
|
- rename the `remove_empty` option `remove_empty_fields`
|
9
16
|
- rename the `exclude` option `except` (alias for `reject`)
|
10
17
|
|
11
|
-
#### Fixes
|
12
|
-
|
13
|
-
- correctly handle backreferences in delimiter patterns
|
14
|
-
|
15
18
|
#### Features
|
16
19
|
|
17
20
|
- add support for descending, negative, and infinite ranges,
|
18
21
|
e.g. `ss.split(str, ":", at: [..4, 4..., 3..1, -1..-3])` etc.
|
19
22
|
|
23
|
+
#### Fixes
|
24
|
+
|
25
|
+
- correctly handle backreferences in delimiter patterns
|
26
|
+
|
20
27
|
## 0.5.1 - 2018-07-01
|
21
28
|
|
22
29
|
#### Changes
|
@@ -25,15 +32,15 @@
|
|
25
32
|
|
26
33
|
## 0.5.0 - 2018-06-26
|
27
34
|
|
28
|
-
#### Fixes
|
29
|
-
|
30
|
-
- don't treat string delimiters as patterns
|
31
|
-
|
32
35
|
#### Features
|
33
36
|
|
34
37
|
- add a `reject`/`exclude` option which rejects splits at the specified positions
|
35
38
|
- add a `select` alias for `at`
|
36
39
|
|
40
|
+
#### Fixes
|
41
|
+
|
42
|
+
- don't treat string delimiters as patterns
|
43
|
+
|
37
44
|
## 0.4.0 - 2018-06-24
|
38
45
|
|
39
46
|
#### Breaking Changes
|
@@ -65,7 +72,7 @@
|
|
65
72
|
#### Breaking Changes
|
66
73
|
|
67
74
|
- the block now takes a single `split` object with an `index` accessor, rather
|
68
|
-
than
|
75
|
+
than separate `index` and `split` arguments
|
69
76
|
|
70
77
|
#### Features
|
71
78
|
|
data/README.md
CHANGED
@@ -44,17 +44,14 @@ ss = StringSplitter.new
|
|
44
44
|
|
45
45
|
```ruby
|
46
46
|
ss.split("foo bar baz")
|
47
|
-
ss.split("
|
47
|
+
ss.split("foo bar baz", " ")
|
48
|
+
ss.split("foo bar baz", /\s+/)
|
48
49
|
# => ["foo", "bar", "baz"]
|
49
|
-
```
|
50
50
|
|
51
|
-
```ruby
|
52
51
|
ss.split("foo", "")
|
53
52
|
ss.split("foo", //)
|
54
53
|
# => ["f", "o", "o"]
|
55
|
-
```
|
56
54
|
|
57
|
-
```ruby
|
58
55
|
ss.split("", "...")
|
59
56
|
ss.split("", /.../)
|
60
57
|
# => []
|
@@ -99,19 +96,13 @@ ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3, 5])
|
|
99
96
|
|
100
97
|
**Split with negative, descending, and infinite ranges**
|
101
98
|
|
102
|
-
```ruby
|
103
|
-
ss.split("1:2:3:4:5:6:7:8:9", ":", at: 4...)
|
104
|
-
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [4...])
|
105
|
-
# => ["1:2:3:4", "5", "6", "7", "8:9"]
|
106
|
-
```
|
107
|
-
|
108
99
|
```ruby
|
109
100
|
ss.split("1:2:3:4:5:6:7:8:9", ":", at: ..-3)
|
110
|
-
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [..-3])
|
111
101
|
# => ["1", "2", "3", "4", "5", "6", "7:8:9"]
|
112
|
-
```
|
113
102
|
|
114
|
-
|
103
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: 4...)
|
104
|
+
# => ["1:2:3:4", "5", "6", "7", "8:9"]
|
105
|
+
|
115
106
|
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1, 5..3, -2..])
|
116
107
|
# => ["1", "2:3", "4", "5", "6:7", "8", "9"]
|
117
108
|
```
|
@@ -182,12 +173,15 @@ end
|
|
182
173
|
# => ["foo", "bar:baz", "quux"]
|
183
174
|
```
|
184
175
|
|
185
|
-
As a shortcut, the common case of splitting
|
186
|
-
positions is supported by
|
176
|
+
As a shortcut, the common case of splitting (or not splitting) at one or more
|
177
|
+
positions is supported by dedicated options:
|
187
178
|
|
188
179
|
```ruby
|
189
|
-
ss.split("foo:bar:baz:quux", ":",
|
180
|
+
ss.split("foo:bar:baz:quux", ":", select: [1, -1])
|
190
181
|
# => ["foo", "bar:baz", "quux"]
|
182
|
+
|
183
|
+
ss.split("foo:bar:baz:quux", ":", reject: [1, -1])
|
184
|
+
# => ["foo:bar", "baz:quux"]
|
191
185
|
```
|
192
186
|
|
193
187
|
# WHY?
|
@@ -263,27 +257,36 @@ ss.split(line, at: [1..5, 8])
|
|
263
257
|
|
264
258
|
## Differences from String#split
|
265
259
|
|
266
|
-
|
267
|
-
|
260
|
+
Unlike `String#split`, StringSplitter doesn't trim the string before splitting
|
261
|
+
(with `String#strip`) if the delimiter is omitted or a single space, e.g.:
|
268
262
|
|
269
263
|
```ruby
|
270
|
-
" foo bar baz ".split
|
271
|
-
|
264
|
+
" foo bar baz ".split # => ["foo", "bar", "baz"]
|
265
|
+
" foo bar baz ".split(" ") # => ["foo", "bar", "baz"]
|
266
|
+
|
267
|
+
ss.split(" foo bar baz ") # => ["", "foo", "bar", "baz", ""]
|
268
|
+
ss.split(" foo bar baz ", " ") # => ["", "foo", "bar", "baz", ""]
|
272
269
|
```
|
273
270
|
|
274
|
-
|
275
|
-
is supplied, e.g.:
|
271
|
+
`String#split` omits the `nil` values of unmatched optional captures:
|
276
272
|
|
277
273
|
```ruby
|
278
|
-
"
|
279
|
-
|
274
|
+
"foo:bar:baz".scan(/(:)|(-)/) # => [[":", nil], [":", nil]]
|
275
|
+
"foo:bar:baz".split(/(:)|(-)/) # => ["foo", ":", "bar", ":", "baz"]
|
280
276
|
```
|
281
277
|
|
282
|
-
|
278
|
+
StringSplitter preserves them by default (if `include_captures` is true, as it
|
279
|
+
is by default), though they can be omitted from spread captures by passing
|
280
|
+
`:compact` as the value of the `spread_captures` option:
|
283
281
|
|
284
282
|
```ruby
|
285
|
-
|
286
|
-
|
283
|
+
s1 = StringSplitter.new(spread_captures: true)
|
284
|
+
s2 = StringSplitter.new(spread_captures: false)
|
285
|
+
s3 = StringSplitter.new(spread_captures: :compact)
|
286
|
+
|
287
|
+
s1.split("foo:bar:baz", /(:)|(-)/) # => ["foo", ":", nil, "bar", ":", nil, "baz"]
|
288
|
+
s2.split("foo:bar:baz", /(:)|(-)/) # => ["foo", [":", nil], "bar", [":", nil], "baz"]
|
289
|
+
s3.split("foo:bar:baz", /(:)|(-)/) # => ["foo", ":", "bar", ":", "baz"]
|
287
290
|
```
|
288
291
|
|
289
292
|
# COMPATIBILITY
|
@@ -294,7 +297,7 @@ currently, Ruby 2.5 and above.
|
|
294
297
|
|
295
298
|
# VERSION
|
296
299
|
|
297
|
-
0.
|
300
|
+
0.7.0
|
298
301
|
|
299
302
|
# SEE ALSO
|
300
303
|
|
data/lib/string_splitter.rb
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'set'
|
4
4
|
require 'values'
|
5
|
+
|
5
6
|
require_relative 'string_splitter/version'
|
6
7
|
|
7
8
|
# This class extends the functionality of +String#split+ by:
|
@@ -16,9 +17,9 @@ require_relative 'string_splitter/version'
|
|
16
17
|
# These enhancements allow splits to handle many cases that otherwise require bigger
|
17
18
|
# guns, e.g. regex matching or parsing.
|
18
19
|
#
|
19
|
-
# Implementation-wise, we
|
20
|
-
#
|
21
|
-
# following fields:
|
20
|
+
# Implementation-wise, we split the string with a scanner which works in a similar
|
21
|
+
# way to +String#split+ and parse the resulting tokens into an array of Split objects
|
22
|
+
# with the following fields:
|
22
23
|
#
|
23
24
|
# - captures: separator substrings captured by parentheses in the delimiter pattern
|
24
25
|
# - count: the number of splits
|
@@ -40,6 +41,7 @@ class StringSplitter
|
|
40
41
|
|
41
42
|
ACCEPT_ALL = ->(_split) { true }
|
42
43
|
DEFAULT_DELIMITER = /\s+/.freeze
|
44
|
+
REMOVE = [].freeze
|
43
45
|
|
44
46
|
Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator) do
|
45
47
|
def position
|
@@ -184,7 +186,7 @@ class StringSplitter
|
|
184
186
|
|
185
187
|
# initialisation common to +split+ and +rsplit+
|
186
188
|
#
|
187
|
-
# takes a hash of options passed to +split+ or +rsplit+ and returns a
|
189
|
+
# takes a hash of options passed to +split+ or +rsplit+ and returns a tuple with
|
188
190
|
# the following fields:
|
189
191
|
#
|
190
192
|
# - result: the array of separated strings to return from +split+ or +rsplit+.
|
@@ -200,10 +202,6 @@ class StringSplitter
|
|
200
202
|
# accepted (true) or rejected (false)
|
201
203
|
#
|
202
204
|
def init(string:, delimiter:, select:, reject:, block:)
|
203
|
-
if delimiter.equal?(DEFAULT_DELIMITER)
|
204
|
-
string = string.strip
|
205
|
-
end
|
206
|
-
|
207
205
|
if reject
|
208
206
|
positions = reject
|
209
207
|
action = Action::REJECT
|
@@ -223,18 +221,25 @@ class StringSplitter
|
|
223
221
|
[[], splits, splits.length, block]
|
224
222
|
end
|
225
223
|
|
226
|
-
def render(
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
224
|
+
def render(values)
|
225
|
+
values.flat_map do |value|
|
226
|
+
if value.is_a?(String)
|
227
|
+
value.empty? && @remove_empty_fields ? REMOVE : [value]
|
228
|
+
elsif @include_captures
|
229
|
+
if @spread_captures
|
230
|
+
@spread_captures == :compact ? value.compact : value
|
231
|
+
elsif value.empty?
|
232
|
+
# we expose non-captures (string delimiters or regexps with no
|
233
|
+
# captures) as empty arrays inside the block, so the type is
|
234
|
+
# consistent, but it doesn't make sense to keep them in the
|
235
|
+
# result
|
236
|
+
REMOVE
|
237
|
+
else
|
238
|
+
[value]
|
239
|
+
end
|
240
|
+
else
|
241
|
+
REMOVE
|
242
|
+
end
|
238
243
|
end
|
239
244
|
end
|
240
245
|
|
@@ -252,14 +257,14 @@ class StringSplitter
|
|
252
257
|
# { lhs: "baz", rhs: "quux", separator: ":", captures: [] },
|
253
258
|
# ]
|
254
259
|
#
|
255
|
-
def parse(string,
|
260
|
+
def parse(string, delimiter)
|
256
261
|
result = []
|
257
262
|
start = 0
|
258
263
|
|
259
264
|
# we don't use the argument passed to the +scan+ block here because it's a
|
260
265
|
# string (the separator) if there are no captures, rather than an empty
|
261
266
|
# array. we use match.captures instead to get the array
|
262
|
-
string.scan(
|
267
|
+
string.scan(delimiter) do
|
263
268
|
match = Regexp.last_match
|
264
269
|
index, after = match.offset(0)
|
265
270
|
separator = match[0]
|
@@ -281,8 +286,8 @@ class StringSplitter
|
|
281
286
|
separator: separator,
|
282
287
|
}
|
283
288
|
|
284
|
-
# move the start index (the start of the lhs) to the index after the
|
285
|
-
# character of the separator
|
289
|
+
# move the start index (the start of the next lhs) to the index after the
|
290
|
+
# last character of the separator
|
286
291
|
start = after
|
287
292
|
end
|
288
293
|
|
@@ -292,8 +297,8 @@ class StringSplitter
|
|
292
297
|
# returns a lambda which splits at (i.e. accepts or rejects splits at, depending
|
293
298
|
# on the action) the supplied positions
|
294
299
|
#
|
295
|
-
# positions are preprocessed to support
|
296
|
-
#
|
300
|
+
# positions are preprocessed to support additional features: negative
|
301
|
+
# ranges, infinite ranges, and descending ranges, e.g.:
|
297
302
|
#
|
298
303
|
# ss.split("foo:bar:baz:quux", ":", at: -1)
|
299
304
|
#
|
@@ -310,7 +315,7 @@ class StringSplitter
|
|
310
315
|
#
|
311
316
|
# ss.split("foo:bar:baz:quux", ":", at: 6..8)
|
312
317
|
#
|
313
|
-
def compile(positions, action,
|
318
|
+
def compile(positions, action, count)
|
314
319
|
# XXX note: we don't use modulo, because we don't want
|
315
320
|
# out-of-bounds indices to silently work, e.g. we don't want:
|
316
321
|
#
|
@@ -318,7 +323,7 @@ class StringSplitter
|
|
318
323
|
#
|
319
324
|
# to mysteriously match when the index/position is 0/1
|
320
325
|
#
|
321
|
-
resolve = ->(int) { int.negative? ?
|
326
|
+
resolve = ->(int) { int.negative? ? count + 1 + int : int }
|
322
327
|
|
323
328
|
# don't use Array(...) to wrap these as we don't want to convert ranges
|
324
329
|
positions = positions.is_a?(Array) ? positions : [positions]
|
@@ -334,7 +339,7 @@ class StringSplitter
|
|
334
339
|
if rbegin.nil?
|
335
340
|
Range.new(1, resolve[rend], rexc)
|
336
341
|
elsif rend.nil?
|
337
|
-
Range.new(resolve[rbegin],
|
342
|
+
Range.new(resolve[rbegin], count, rexc)
|
338
343
|
elsif rbegin.negative? || rend.negative? || (rend - rbegin).negative?
|
339
344
|
from = resolve[rbegin]
|
340
345
|
to = resolve[rend]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chocolateboy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-08-
|
11
|
+
date: 2020-08-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: values
|