string_splitter 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +47 -10
- data/README.md +139 -49
- data/lib/string_splitter.rb +233 -181
- data/lib/string_splitter/version.rb +1 -1
- metadata +16 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d97ccb956fe51694359cdb0d3a997d6574de088bac6ed5a8e572f92bb5ed54a
|
4
|
+
data.tar.gz: 845cefeb5efd5d01baa45759cb05ff7ae5e9a457c1f148b340bb24c038bd259e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a935a6e0f3434801dcae6a32575779e1d2eb706f8f208087a208e7fdba39ac5b49928f8b7617aec60493a8db5988a013028650f8b2ced01fadb620bfd4c77e5
|
7
|
+
data.tar.gz: d76c18a283c1e113c8bffb73b813eb6074481faa7ea339811dc9a7424a5e24fdc3efbe9afa941459e566cde8271c3cd19a97e3a37a8cf90d36a65a7bf8fd6dcf
|
data/CHANGELOG.md
CHANGED
@@ -1,37 +1,74 @@
|
|
1
|
+
## 0.6.0 - 2020-08-20
|
2
|
+
|
3
|
+
#### Breaking Changes
|
4
|
+
|
5
|
+
- `ss.split(str, " ")` is no longer treated the same as `ss.split(str)` i.e.
|
6
|
+
unlike Ruby's `String#split` (but like Crystal's), the former no longer
|
7
|
+
strips the string before splitting
|
8
|
+
- rename the `remove_empty` option `remove_empty_fields`
|
9
|
+
- rename the `exclude` option `except` (alias for `reject`)
|
10
|
+
|
11
|
+
#### Fixes
|
12
|
+
|
13
|
+
- correctly handle backreferences in delimiter patterns
|
14
|
+
|
15
|
+
#### Features
|
16
|
+
|
17
|
+
- add support for descending, negative, and infinite ranges,
|
18
|
+
e.g. `ss.split(str, ":", at: [..4, 4..., 3..1, -1..-3])` etc.
|
19
|
+
|
1
20
|
## 0.5.1 - 2018-07-01
|
2
21
|
|
22
|
+
#### Changes
|
23
|
+
|
3
24
|
- set StringSplitter::VERSION when `string_splitter.rb` is loaded
|
4
|
-
- doc tweaks
|
5
25
|
|
6
26
|
## 0.5.0 - 2018-06-26
|
7
27
|
|
28
|
+
#### Fixes
|
29
|
+
|
8
30
|
- don't treat string delimiters as patterns
|
31
|
+
|
32
|
+
#### Features
|
33
|
+
|
9
34
|
- add a `reject`/`exclude` option which rejects splits at the specified positions
|
10
35
|
- add a `select` alias for `at`
|
11
36
|
|
12
37
|
## 0.4.0 - 2018-06-24
|
13
38
|
|
14
|
-
|
39
|
+
#### Breaking Changes
|
40
|
+
|
41
|
+
- remove the `offset` alias for `split.index`
|
15
42
|
|
16
43
|
## 0.3.1 - 2018-06-24
|
17
44
|
|
18
|
-
|
45
|
+
#### Fixes
|
46
|
+
|
47
|
+
- remove trailing empty field when the separator is empty
|
48
|
+
([#1](https://github.com/chocolateboy/string_splitter/issues/1))
|
19
49
|
|
20
50
|
## 0.3.0 - 2018-06-23
|
21
51
|
|
22
|
-
|
23
|
-
|
24
|
-
|
52
|
+
#### Breaking Changes
|
53
|
+
|
54
|
+
- rename the `default_separator` option `default_delimiter`
|
25
55
|
|
26
56
|
## 0.2.0 - 2018-06-22
|
27
57
|
|
28
|
-
|
29
|
-
|
58
|
+
#### Breaking Changes
|
59
|
+
|
60
|
+
- make `index` (AKA `offset`) 0-based and add `position` (AKA `pos`) as the
|
61
|
+
1-based accessor
|
30
62
|
|
31
63
|
## 0.1.0 - 2018-06-22
|
32
64
|
|
33
|
-
|
34
|
-
|
65
|
+
#### Breaking Changes
|
66
|
+
|
67
|
+
- the block now takes a single `split` object with an `index` accessor, rather
|
68
|
+
than seperate `index` and `split` arguments
|
69
|
+
|
70
|
+
#### Features
|
71
|
+
|
35
72
|
- add support for negative indices in the value supplied to the `at` option
|
36
73
|
- add a `count` field to the split object containing the total number of splits
|
37
74
|
|
data/README.md
CHANGED
@@ -3,14 +3,15 @@
|
|
3
3
|
[![Build Status](https://travis-ci.org/chocolateboy/string_splitter.svg)](https://travis-ci.org/chocolateboy/string_splitter)
|
4
4
|
[![Gem Version](https://img.shields.io/gem/v/string_splitter.svg)](https://rubygems.org/gems/string_splitter)
|
5
5
|
|
6
|
-
<!--
|
7
|
-
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
6
|
+
<!-- toc -->
|
8
7
|
|
9
8
|
- [NAME](#name)
|
10
9
|
- [INSTALLATION](#installation)
|
11
10
|
- [SYNOPSIS](#synopsis)
|
12
11
|
- [DESCRIPTION](#description)
|
13
12
|
- [WHY?](#why)
|
13
|
+
- [CAVEATS](#caveats)
|
14
|
+
- [Differences from String#split](#differences-from-string%23split)
|
14
15
|
- [COMPATIBILITY](#compatibility)
|
15
16
|
- [VERSION](#version)
|
16
17
|
- [SEE ALSO](#see-also)
|
@@ -19,7 +20,7 @@
|
|
19
20
|
- [AUTHOR](#author)
|
20
21
|
- [COPYRIGHT AND LICENSE](#copyright-and-license)
|
21
22
|
|
22
|
-
<!--
|
23
|
+
<!-- tocstop -->
|
23
24
|
|
24
25
|
# NAME
|
25
26
|
|
@@ -42,16 +43,28 @@ ss = StringSplitter.new
|
|
42
43
|
**Same as `String#split`**
|
43
44
|
|
44
45
|
```ruby
|
45
|
-
ss.split("foo bar baz
|
46
|
-
ss.split("foo bar baz
|
47
|
-
|
48
|
-
|
46
|
+
ss.split("foo bar baz")
|
47
|
+
ss.split(" foo bar baz ")
|
48
|
+
# => ["foo", "bar", "baz"]
|
49
|
+
```
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
ss.split("foo", "")
|
53
|
+
ss.split("foo", //)
|
54
|
+
# => ["f", "o", "o"]
|
55
|
+
```
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
ss.split("", "...")
|
59
|
+
ss.split("", /.../)
|
60
|
+
# => []
|
49
61
|
```
|
50
62
|
|
51
63
|
**Split at the first delimiter**
|
52
64
|
|
53
65
|
```ruby
|
54
66
|
ss.split("foo:bar:baz:quux", ":", at: 1)
|
67
|
+
ss.split("foo:bar:baz:quux", ":", select: 1)
|
55
68
|
# => ["foo", "bar:baz:quux"]
|
56
69
|
```
|
57
70
|
|
@@ -65,8 +78,16 @@ ss.split("foo:bar:baz:quux", ":", at: -1)
|
|
65
78
|
**Split at multiple delimiter positions**
|
66
79
|
|
67
80
|
```ruby
|
68
|
-
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -
|
69
|
-
# => ["1", "2", "3", "4:5:6:7", "
|
81
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -1])
|
82
|
+
# => ["1", "2", "3", "4:5:6:7:8", "9"]
|
83
|
+
```
|
84
|
+
|
85
|
+
**Split at all but the first and last delimiters**
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
ss.split("1:2:3:4:5:6", ":", except: [1, -1])
|
89
|
+
ss.split("1:2:3:4:5:6", ":", reject: [1, -1])
|
90
|
+
# => ["1:2", "3", "4", "5:6"]
|
70
91
|
```
|
71
92
|
|
72
93
|
**Split from the right**
|
@@ -75,44 +96,79 @@ ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -2])
|
|
75
96
|
ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3, 5])
|
76
97
|
# => ["1:2:3:4", "5:6", "7", "8", "9"]
|
77
98
|
```
|
99
|
+
|
100
|
+
**Split with negative, descending, and infinite ranges**
|
101
|
+
|
102
|
+
```ruby
|
103
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: 4...)
|
104
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [4...])
|
105
|
+
# => ["1:2:3:4", "5", "6", "7", "8:9"]
|
106
|
+
```
|
107
|
+
|
108
|
+
```ruby
|
109
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: ..-3)
|
110
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [..-3])
|
111
|
+
# => ["1", "2", "3", "4", "5", "6", "7:8:9"]
|
112
|
+
```
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1, 5..3, -2..])
|
116
|
+
# => ["1", "2:3", "4", "5", "6:7", "8", "9"]
|
117
|
+
```
|
118
|
+
|
78
119
|
**Full control via a block**
|
79
120
|
|
80
121
|
```ruby
|
81
|
-
result = ss.split(
|
82
|
-
split.
|
122
|
+
result = ss.split("1:2:3:4:5:6:7:8", ":") do |split|
|
123
|
+
split.pos % 2 == 0
|
83
124
|
end
|
84
|
-
# => ["
|
125
|
+
# => ["1:2", "3:4", "5:6", "7:8"]
|
126
|
+
```
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
string = "banana".chars.sort.join # "aaabnn"
|
130
|
+
|
131
|
+
ss.split(string, "") do |split|
|
132
|
+
split.rhs != split.lhs
|
133
|
+
end
|
134
|
+
# => ["aaa", "b", "nn"]
|
85
135
|
```
|
86
136
|
|
87
137
|
# DESCRIPTION
|
88
138
|
|
89
|
-
Many languages have built-in `split` functions/methods for strings. They behave
|
90
|
-
(notwithstanding the occasional
|
91
|
-
and
|
139
|
+
Many languages have built-in `split` functions/methods for strings. They behave
|
140
|
+
similarly (notwithstanding the occasional
|
141
|
+
[surprise](https://chriszetter.com/blog/2017/10/29/splitting-strings/)), and
|
142
|
+
handle a few common cases e.g.:
|
92
143
|
|
93
144
|
* limiting the number of splits
|
94
145
|
* including the separator(s) in the results
|
95
146
|
* removing (some) empty fields
|
96
147
|
|
97
|
-
But, because the API is squeezed into two overloaded parameters (the delimiter
|
98
|
-
achieving the desired results can be tricky. For instance,
|
99
|
-
trailing fields (by default), it provides no
|
100
|
-
|
101
|
-
to
|
148
|
+
But, because the API is squeezed into two overloaded parameters (the delimiter
|
149
|
+
and the limit), achieving the desired results can be tricky. For instance,
|
150
|
+
while `String#split` removes empty trailing fields (by default), it provides no
|
151
|
+
way to remove *all* empty fields. Likewise, the cramped API means there's no
|
152
|
+
way to e.g. combine a limit (positive integer) with the option to preserve
|
153
|
+
empty fields (negative integer), or use backreferences in a delimiter pattern
|
102
154
|
without including its captured subexpressions in the result.
|
103
155
|
|
104
|
-
If `split` was being written from scratch, without the baggage of its legacy
|
105
|
-
it's possible that some of these options would be made explicit rather
|
106
|
-
the parameters. And, indeed, this is possible in some
|
107
|
-
e.g. in Crystal:
|
156
|
+
If `split` was being written from scratch, without the baggage of its legacy
|
157
|
+
API, it's possible that some of these options would be made explicit rather
|
158
|
+
than overloading the parameters. And, indeed, this is possible in some
|
159
|
+
implementations, e.g. in Crystal:
|
108
160
|
|
109
161
|
```ruby
|
110
|
-
":foo:bar:baz:".split(":", remove_empty: false)
|
111
|
-
|
162
|
+
":foo:bar:baz:".split(":", remove_empty: false)
|
163
|
+
# => ["", "foo", "bar", "baz", ""]
|
164
|
+
|
165
|
+
":foo:bar:baz:".split(":", remove_empty: true)
|
166
|
+
# => ["foo", "bar", "baz"]
|
112
167
|
````
|
113
168
|
|
114
|
-
StringSplitter takes this one step further by moving the configuration out of
|
115
|
-
and delegating the strategy — i.e. which splits should be
|
169
|
+
StringSplitter takes this one step further by moving the configuration out of
|
170
|
+
the method altogether and delegating the strategy — i.e. which splits should be
|
171
|
+
accepted or rejected — to a block:
|
116
172
|
|
117
173
|
```ruby
|
118
174
|
ss = StringSplitter.new
|
@@ -120,22 +176,28 @@ ss = StringSplitter.new
|
|
120
176
|
ss.split("foo:bar:baz", ":") { |split| split.index == 0 }
|
121
177
|
# => ["foo", "bar:baz"]
|
122
178
|
|
123
|
-
ss.split("foo:bar:baz", ":")
|
124
|
-
|
179
|
+
ss.split("foo:bar:baz:quux", ":") do |split|
|
180
|
+
split.position == 1 || split.position == 3
|
181
|
+
end
|
182
|
+
# => ["foo", "bar:baz", "quux"]
|
125
183
|
```
|
126
184
|
|
127
|
-
As a shortcut, the common case of splitting on delimiters at one or more
|
185
|
+
As a shortcut, the common case of splitting on delimiters at one or more
|
186
|
+
positions is supported by an option:
|
128
187
|
|
129
188
|
```ruby
|
130
|
-
ss.split(
|
189
|
+
ss.split("foo:bar:baz:quux", ":", at: [1, -1])
|
190
|
+
# => ["foo", "bar:baz", "quux"]
|
131
191
|
```
|
132
192
|
|
133
193
|
# WHY?
|
134
194
|
|
135
|
-
I wanted to split semi-structured output into fields without having to resort
|
195
|
+
I wanted to split semi-structured output into fields without having to resort
|
196
|
+
to a regex or a full-blown parser.
|
136
197
|
|
137
|
-
As an example, the nominally unstructured output of many Unix commands is often
|
138
|
-
that's tantalizingly close to being
|
198
|
+
As an example, the nominally unstructured output of many Unix commands is often
|
199
|
+
formatted in a way that's tantalizingly close to being
|
200
|
+
[machine-readable](https://en.wikipedia.org/wiki/Delimiter-separated_values),
|
139
201
|
apart from a few pesky exceptions e.g.:
|
140
202
|
|
141
203
|
```bash
|
@@ -148,8 +210,8 @@ drwxr-xr-x 3 user users 4096 Jun 19 22:56 lib
|
|
148
210
|
-rw-r--r-- 1 user users 3134 Jun 19 22:59 README.md
|
149
211
|
```
|
150
212
|
|
151
|
-
These lines can *almost* be parsed into an array of fields by splitting them on
|
152
|
-
date (columns 6-8) i.e.:
|
213
|
+
These lines can *almost* be parsed into an array of fields by splitting them on
|
214
|
+
whitespace. The exception is the date (columns 6-8) i.e.:
|
153
215
|
|
154
216
|
```ruby
|
155
217
|
line = "-rw-r--r-- 1 user users 87 Jun 18 18:16 CHANGELOG.md"
|
@@ -174,13 +236,14 @@ One way to work around this is to parse the whole line e.g.:
|
|
174
236
|
line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+ \S+) \s+ (.+)$/x)
|
175
237
|
```
|
176
238
|
|
177
|
-
But that requires us to specify *everything*. What we really want is a version
|
178
|
-
which allows us to veto splitting for the 6th and 7th delimiters
|
179
|
-
|
180
|
-
|
239
|
+
But that requires us to specify *everything*. What we really want is a version
|
240
|
+
of `split` which allows us to veto splitting for the 6th and 7th delimiters
|
241
|
+
(and to stop after the 8th delimiter) i.e. control over which splits are
|
242
|
+
accepted, rather than being restricted to the single, baked-in strategy
|
243
|
+
provided by the `limit` parameter.
|
181
244
|
|
182
|
-
By providing a simple way to accept or reject each split, StringSplitter makes
|
183
|
-
this easy to handle, either via a block:
|
245
|
+
By providing a simple way to accept or reject each split, StringSplitter makes
|
246
|
+
cases like this easy to handle, either via a block:
|
184
247
|
|
185
248
|
```ruby
|
186
249
|
ss.split(line) do |split|
|
@@ -196,14 +259,42 @@ ss.split(line, at: [1..5, 8])
|
|
196
259
|
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
197
260
|
```
|
198
261
|
|
262
|
+
# CAVEATS
|
263
|
+
|
264
|
+
## Differences from String#split
|
265
|
+
|
266
|
+
StringSplitter shares `String#split`'s behavior of trimming the string before
|
267
|
+
splitting if the delimiter is omitted, e.g.:
|
268
|
+
|
269
|
+
```ruby
|
270
|
+
" foo bar baz ".split # => ["foo", "bar", "baz"]
|
271
|
+
ss.split(" foo bar baz ") # => ["foo", "bar", "baz"]
|
272
|
+
```
|
273
|
+
|
274
|
+
However, unlike `String#split`, this doesn't also apply if a delimiter of `" "`
|
275
|
+
is supplied, e.g.:
|
276
|
+
|
277
|
+
```ruby
|
278
|
+
" foo bar baz ".split(" ") # => ["foo", "bar", "baz"]
|
279
|
+
ss.split(" foo bar baz ", " ") # => ["", "foo", "bar", "baz", ""]
|
280
|
+
```
|
281
|
+
|
282
|
+
It also doesn't apply if a custom default-delimiter is defined:
|
283
|
+
|
284
|
+
```ruby
|
285
|
+
ss = StringSplitter.new(default_delimiter: /\s+/)
|
286
|
+
ss.split(" foo bar baz ") # => ["", "foo", "bar", "baz", ""]
|
287
|
+
```
|
288
|
+
|
199
289
|
# COMPATIBILITY
|
200
290
|
|
201
|
-
StringSplitter is tested and supported on all versions of Ruby [supported by
|
202
|
-
|
291
|
+
StringSplitter is tested and supported on all versions of Ruby [supported by
|
292
|
+
the ruby-core team](https://www.ruby-lang.org/en/downloads/branches/), i.e.,
|
293
|
+
currently, Ruby 2.5 and above.
|
203
294
|
|
204
295
|
# VERSION
|
205
296
|
|
206
|
-
0.
|
297
|
+
0.6.0
|
207
298
|
|
208
299
|
# SEE ALSO
|
209
300
|
|
@@ -221,8 +312,7 @@ i.e., currently, Ruby 2.3 and above.
|
|
221
312
|
|
222
313
|
# COPYRIGHT AND LICENSE
|
223
314
|
|
224
|
-
Copyright © 2018 by chocolateboy.
|
315
|
+
Copyright © 2018-2020 by chocolateboy.
|
225
316
|
|
226
317
|
This is free software; you can redistribute it and/or modify it under the
|
227
|
-
terms of the [Artistic License 2.0](
|
228
|
-
|
318
|
+
terms of the [Artistic License 2.0](https://www.opensource.org/licenses/artistic-license-2.0.php).
|
data/lib/string_splitter.rb
CHANGED
@@ -1,21 +1,45 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'set'
|
3
4
|
require 'values'
|
4
5
|
require_relative 'string_splitter/version'
|
5
6
|
|
6
7
|
# This class extends the functionality of +String#split+ by:
|
7
8
|
#
|
8
9
|
# - providing full control over which splits are accepted or rejected
|
10
|
+
#
|
9
11
|
# - adding support for splitting from right-to-left
|
12
|
+
#
|
10
13
|
# - encapsulating splitting options/preferences in the splitter rather
|
11
14
|
# than trying to cram them into overloaded method parameters
|
12
15
|
#
|
13
16
|
# These enhancements allow splits to handle many cases that otherwise require bigger
|
14
|
-
# guns e.g. regex matching or parsing.
|
17
|
+
# guns, e.g. regex matching or parsing.
|
18
|
+
#
|
19
|
+
# Implementation-wise, we effectively use the built-in +String#split+ method as a
|
20
|
+
# tokenizer, and parse the resulting tokens into an array of Split objects with the
|
21
|
+
# following fields:
|
22
|
+
#
|
23
|
+
# - captures: separator substrings captured by parentheses in the delimiter pattern
|
24
|
+
# - count: the number of splits
|
25
|
+
# - index: the 0-based index of the split in the array
|
26
|
+
# - lhs: the string to the left of the separator (back to the previous split candidate)
|
27
|
+
# - position: the 1-based index of the split in the array (alias: pos)
|
28
|
+
# - rhs: the string to the right of the separator (up to the next split candidate)
|
29
|
+
# - rindex: the 0-based index of the split relative to the end of the array
|
30
|
+
# - rposition: the 1-based index of the split relative to the end of the array (alias: rpos)
|
31
|
+
# - separator: the string matched by the delimiter pattern/string
|
32
|
+
#
|
15
33
|
class StringSplitter
|
34
|
+
# terminology: the delimiter is what we provide and the separators are what we get
|
35
|
+
# back (if we capture them). e.g. for:
|
36
|
+
#
|
37
|
+
# ss.split("foo:bar::baz", /(\W+)/)
|
38
|
+
#
|
39
|
+
# the delimiter is /(\W)/ and the separators are ":" and "::"
|
40
|
+
|
16
41
|
ACCEPT_ALL = ->(_split) { true }
|
17
|
-
DEFAULT_DELIMITER = /\s
|
18
|
-
NO_SPLITS = []
|
42
|
+
DEFAULT_DELIMITER = /\s+/.freeze
|
19
43
|
|
20
44
|
Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator) do
|
21
45
|
def position
|
@@ -23,32 +47,78 @@ class StringSplitter
|
|
23
47
|
end
|
24
48
|
|
25
49
|
alias_method :pos, :position
|
50
|
+
|
51
|
+
# 0-based index relative to the end of the array, e.g. for 5 items:
|
52
|
+
#
|
53
|
+
# index | rindex
|
54
|
+
# ------|-------
|
55
|
+
# 0 | 4
|
56
|
+
# 1 | 3
|
57
|
+
# 2 | 2
|
58
|
+
# 3 | 1
|
59
|
+
# 4 | 0
|
60
|
+
def rindex
|
61
|
+
count - position
|
62
|
+
end
|
63
|
+
|
64
|
+
# 1-based position relative to the end of the array, e.g. for 5 items:
|
65
|
+
#
|
66
|
+
# position | rposition
|
67
|
+
# ----------|----------
|
68
|
+
# 1 | 5
|
69
|
+
# 2 | 4
|
70
|
+
# 3 | 3
|
71
|
+
# 4 | 2
|
72
|
+
# 5 | 1
|
73
|
+
def rposition
|
74
|
+
count + 1 - position
|
75
|
+
end
|
76
|
+
|
77
|
+
alias_method :rpos, :rposition
|
78
|
+
end
|
79
|
+
|
80
|
+
# simulate an enum. the value is returned by the case statement
|
81
|
+
# in the generated block if the positions match
|
82
|
+
module Action
|
83
|
+
SELECT = true
|
84
|
+
REJECT = false
|
26
85
|
end
|
27
86
|
|
87
|
+
private_constant :Action
|
88
|
+
|
28
89
|
def initialize(
|
29
90
|
default_delimiter: DEFAULT_DELIMITER,
|
30
91
|
include_captures: true,
|
31
|
-
remove_empty: false,
|
92
|
+
remove_empty: false, # TODO remove this
|
93
|
+
remove_empty_fields: remove_empty,
|
32
94
|
spread_captures: true
|
33
95
|
)
|
34
96
|
@default_delimiter = default_delimiter
|
35
97
|
@include_captures = include_captures
|
36
|
-
@
|
98
|
+
@remove_empty_fields = remove_empty_fields
|
37
99
|
@spread_captures = spread_captures
|
38
100
|
end
|
39
101
|
|
40
|
-
attr_reader
|
102
|
+
attr_reader(
|
103
|
+
:default_delimiter,
|
104
|
+
:include_captures,
|
105
|
+
:remove_empty_fields,
|
106
|
+
:spread_captures
|
107
|
+
)
|
108
|
+
|
109
|
+
# TODO remove this
|
110
|
+
alias remove_empty remove_empty_fields
|
41
111
|
|
42
112
|
def split(
|
43
113
|
string,
|
44
114
|
delimiter = @default_delimiter,
|
45
|
-
at: nil,
|
115
|
+
at: nil, # alias for select
|
116
|
+
except: nil, # alias for reject
|
46
117
|
select: at,
|
47
|
-
|
48
|
-
reject: exclude,
|
118
|
+
reject: except,
|
49
119
|
&block
|
50
120
|
)
|
51
|
-
result, splits,
|
121
|
+
result, splits, count, accept = init(
|
52
122
|
string: string,
|
53
123
|
delimiter: delimiter,
|
54
124
|
select: select,
|
@@ -56,29 +126,21 @@ class StringSplitter
|
|
56
126
|
block: block
|
57
127
|
)
|
58
128
|
|
59
|
-
|
129
|
+
return result unless splits
|
60
130
|
|
61
|
-
splits.each_with_index do |
|
62
|
-
split = Split.with(
|
131
|
+
splits.each_with_index do |hash, index|
|
132
|
+
split = Split.with(hash.merge({ count: count, index: index }))
|
63
133
|
result << split.lhs if result.empty?
|
64
134
|
|
65
|
-
if
|
66
|
-
|
67
|
-
if @spread_captures
|
68
|
-
result += split.captures
|
69
|
-
else
|
70
|
-
result << split.captures
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
result << split.rhs
|
135
|
+
if accept.call(split)
|
136
|
+
result << split.captures << split.rhs
|
75
137
|
else
|
76
|
-
#
|
138
|
+
# append the rhs
|
77
139
|
result[-1] = result[-1] + split.separator + split.rhs
|
78
140
|
end
|
79
141
|
end
|
80
142
|
|
81
|
-
result
|
143
|
+
render(result)
|
82
144
|
end
|
83
145
|
|
84
146
|
alias lsplit split
|
@@ -86,13 +148,13 @@ class StringSplitter
|
|
86
148
|
def rsplit(
|
87
149
|
string,
|
88
150
|
delimiter = @default_delimiter,
|
89
|
-
at: nil,
|
151
|
+
at: nil, # alias for select
|
152
|
+
except: nil, # alias for reject
|
90
153
|
select: at,
|
91
|
-
|
92
|
-
reject: exclude,
|
154
|
+
reject: except,
|
93
155
|
&block
|
94
156
|
)
|
95
|
-
result, splits,
|
157
|
+
result, splits, count, accept = init(
|
96
158
|
string: string,
|
97
159
|
delimiter: delimiter,
|
98
160
|
select: select,
|
@@ -100,203 +162,193 @@ class StringSplitter
|
|
100
162
|
block: block
|
101
163
|
)
|
102
164
|
|
103
|
-
|
165
|
+
return result unless splits
|
104
166
|
|
105
|
-
splits.
|
106
|
-
split = Split.with(
|
167
|
+
splits.reverse_each.with_index do |hash, index|
|
168
|
+
split = Split.with(hash.merge({ count: count, index: index }))
|
107
169
|
result.unshift(split.rhs) if result.empty?
|
108
170
|
|
109
|
-
if
|
110
|
-
|
111
|
-
|
112
|
-
result = split.captures + result
|
113
|
-
else
|
114
|
-
result.unshift(split.captures)
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
result.unshift(split.lhs)
|
171
|
+
if accept.call(split)
|
172
|
+
# [lhs + captures] + result
|
173
|
+
result.unshift(split.lhs, split.captures)
|
119
174
|
else
|
120
175
|
# prepend the lhs
|
121
176
|
result[0] = split.lhs + split.separator + result[0]
|
122
177
|
end
|
123
178
|
end
|
124
179
|
|
125
|
-
result
|
180
|
+
render(result)
|
126
181
|
end
|
127
182
|
|
128
183
|
private
|
129
184
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
end
|
185
|
+
# initialisation common to +split+ and +rsplit+
|
186
|
+
#
|
187
|
+
# takes a hash of options passed to +split+ or +rsplit+ and returns a triple with
|
188
|
+
# the following fields:
|
189
|
+
#
|
190
|
+
# - result: the array of separated strings to return from +split+ or +rsplit+.
|
191
|
+
# if the splits arry is empty, the caller returns this array immediately
|
192
|
+
# without any further processing
|
193
|
+
#
|
194
|
+
# - splits: an array of hashes containing the lhs, rhs, separator and captured
|
195
|
+
# separator substrings for each split
|
196
|
+
#
|
197
|
+
# - count: the number of splits
|
198
|
+
#
|
199
|
+
# - accept: a proc whose return value determines whether each split should be
|
200
|
+
# accepted (true) or rejected (false)
|
201
|
+
#
|
202
|
+
def init(string:, delimiter:, select:, reject:, block:)
|
203
|
+
if delimiter.equal?(DEFAULT_DELIMITER)
|
204
|
+
string = string.strip
|
205
|
+
end
|
152
206
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
207
|
+
if reject
|
208
|
+
positions = reject
|
209
|
+
action = Action::REJECT
|
210
|
+
elsif select
|
211
|
+
positions = select
|
212
|
+
action = Action::SELECT
|
159
213
|
end
|
160
214
|
|
161
|
-
|
162
|
-
end
|
215
|
+
splits = parse(string, delimiter)
|
163
216
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
#
|
168
|
-
# triple, where `result` is the return value of the method, `splits` is an array
|
169
|
-
# of hashes containing the lhs/rhs, separator and captures of each split, and
|
170
|
-
# `block` is a proc which specifies whether each split should be accepted or
|
171
|
-
# rejected
|
172
|
-
def split_init(string:, delimiter:, select:, reject:, block:)
|
173
|
-
unless (match = string.match(delimiter))
|
174
|
-
result = (@remove_empty && string.empty?) ? [] : [string]
|
175
|
-
return [result, NO_SPLITS, block]
|
217
|
+
if splits.empty?
|
218
|
+
result = string.empty? ? [] : [string]
|
219
|
+
return [result]
|
176
220
|
end
|
177
221
|
|
178
|
-
|
179
|
-
|
222
|
+
block ||= positions ? compile(positions, action, splits.length) : ACCEPT_ALL
|
223
|
+
[[], splits, splits.length, block]
|
224
|
+
end
|
180
225
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
elsif !select.empty?
|
185
|
-
positions = select
|
186
|
-
action = :select
|
226
|
+
def render(result)
|
227
|
+
if @remove_empty_fields
|
228
|
+
result.reject! { |it| it.is_a?(String) && it.empty? }
|
187
229
|
end
|
188
230
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
parts = string.split(/(#{delimiter})/, -1)
|
193
|
-
remove_trailing_empty_field!(parts, ncaptures)
|
194
|
-
result, splits = splits_for(parts, ncaptures)
|
195
|
-
block ||= positions ? match_positions(positions, action, splits.length) : ACCEPT_ALL
|
231
|
+
unless @include_captures
|
232
|
+
return result.reject! { |it| it.is_a?(Array) }
|
233
|
+
end
|
196
234
|
|
197
|
-
|
235
|
+
result.flat_map do |value|
|
236
|
+
next [value] unless value.is_a?(Array) && @spread_captures
|
237
|
+
@spread_captures == :compact ? value.compact : value
|
238
|
+
end
|
198
239
|
end
|
199
240
|
|
200
|
-
#
|
201
|
-
#
|
202
|
-
#
|
203
|
-
# e.g. to split on:
|
241
|
+
# takes a string and a delimiter pattern (regex or string) and splits it along
|
242
|
+
# the delimiter, returning an array of objects (hashes) representing each split.
|
243
|
+
# e.g. for:
|
204
244
|
#
|
205
|
-
#
|
206
|
-
# - <bar-comment> ... </bar-comment>
|
245
|
+
# parse.split("foo:bar:baz:quux", ":")
|
207
246
|
#
|
208
|
-
#
|
247
|
+
# we return:
|
209
248
|
#
|
210
|
-
#
|
249
|
+
# [
|
250
|
+
# { lhs: "foo", rhs: "bar", separator: ":", captures: [] },
|
251
|
+
# { lhs: "bar", rhs: "baz", separator: ":", captures: [] },
|
252
|
+
# { lhs: "baz", rhs: "quux", separator: ":", captures: [] },
|
253
|
+
# ]
|
211
254
|
#
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
#
|
216
|
-
# %r| ( <(\w+-comment)> [^<]* </\2-comment> ) |x
|
255
|
+
def parse(string, pattern)
|
256
|
+
result = []
|
257
|
+
start = 0
|
217
258
|
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
259
|
+
# we don't use the argument passed to the +scan+ block here because it's a
|
260
|
+
# string (the separator) if there are no captures, rather than an empty
|
261
|
+
# array. we use match.captures instead to get the array
|
262
|
+
string.scan(pattern) do
|
263
|
+
match = Regexp.last_match
|
264
|
+
index, after = match.offset(0)
|
265
|
+
separator = match[0]
|
266
|
+
|
267
|
+
# ignore empty separators at the beginning and/or end of the string
|
268
|
+
next if separator.empty? && (index.zero? || after == string.length)
|
269
|
+
|
270
|
+
lhs = string.slice(start, index - start)
|
271
|
+
result.last[:rhs] = lhs unless result.empty?
|
272
|
+
|
273
|
+
# this is correct for the last/only match, but gets updated to the next
|
274
|
+
# match's lhs for other matches
|
275
|
+
rhs = match.post_match
|
276
|
+
|
277
|
+
result << {
|
278
|
+
captures: match.captures,
|
279
|
+
lhs: lhs,
|
280
|
+
rhs: rhs,
|
281
|
+
separator: separator,
|
282
|
+
}
|
283
|
+
|
284
|
+
# move the start index (the start of the lhs) to the index after the last
|
285
|
+
# character of the separator
|
286
|
+
start = after
|
224
287
|
end
|
225
288
|
|
226
|
-
|
289
|
+
result
|
227
290
|
end
|
228
291
|
|
229
|
-
#
|
230
|
-
# on
|
292
|
+
# returns a lambda which splits at (i.e. accepts or rejects splits at, depending
|
293
|
+
# on the action) the supplied positions
|
231
294
|
#
|
232
|
-
#
|
233
|
-
#
|
234
|
-
# # => ["f", "o", "o", "b", "a", "r", ""]
|
295
|
+
# positions are preprocessed to support an additional feature: negative indices
|
296
|
+
# are translated to 1-based non-negative indices, e.g:
|
235
297
|
#
|
236
|
-
#
|
237
|
-
# # => ["f", "", "o", "", "o", "", "b", "", "a", "", "r", "", ""]
|
298
|
+
# ss.split("foo:bar:baz:quux", ":", at: -1)
|
238
299
|
#
|
239
|
-
#
|
240
|
-
# # => ["f", "", "", "o", "", "", "o", "", "", "b", "", "", "a", "", "", "r", "", "", ""]
|
300
|
+
# translates to:
|
241
301
|
#
|
242
|
-
#
|
243
|
-
#
|
244
|
-
#
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
#
|
302
|
+
# ss.split("foo:bar:baz:quux", ":", at: 3)
|
303
|
+
#
|
304
|
+
# and
|
305
|
+
#
|
306
|
+
# ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
|
307
|
+
# ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
|
308
|
+
#
|
309
|
+
# translate to:
|
310
|
+
#
|
311
|
+
# ss.split("foo:bar:baz:quux", ":", at: 6..8)
|
312
|
+
#
|
313
|
+
def compile(positions, action, nsplits)
|
314
|
+
# XXX note: we don't use modulo, because we don't want
|
315
|
+
# out-of-bounds indices to silently work, e.g. we don't want:
|
255
316
|
#
|
256
|
-
#
|
317
|
+
# ss.split("foo:bar:baz:quux", ":", at: -42)
|
257
318
|
#
|
258
|
-
#
|
259
|
-
# + ncaptures
|
260
|
-
# + 1 (separator)
|
319
|
+
# to mysteriously match when the index/position is 0/1
|
261
320
|
#
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
# to mysteriously match when the position is 2
|
289
|
-
|
290
|
-
nsplits + 1 + position
|
321
|
+
resolve = ->(int) { int.negative? ? nsplits + 1 + int : int }
|
322
|
+
|
323
|
+
# don't use Array(...) to wrap these as we don't want to convert ranges
|
324
|
+
positions = positions.is_a?(Array) ? positions : [positions]
|
325
|
+
|
326
|
+
positions = positions.map do |position|
|
327
|
+
if position.is_a?(Integer)
|
328
|
+
resolve[position]
|
329
|
+
elsif position.is_a?(Range)
|
330
|
+
rbegin = position.begin
|
331
|
+
rend = position.end
|
332
|
+
rexc = position.exclude_end?
|
333
|
+
|
334
|
+
if rbegin.nil?
|
335
|
+
Range.new(1, resolve[rend], rexc)
|
336
|
+
elsif rend.nil?
|
337
|
+
Range.new(resolve[rbegin], nsplits, rexc)
|
338
|
+
elsif rbegin.negative? || rend.negative? || (rend - rbegin).negative?
|
339
|
+
from = resolve[rbegin]
|
340
|
+
to = resolve[rend]
|
341
|
+
to < from ? Range.new(to, from, rexc) : Range.new(from, to, rexc)
|
342
|
+
else
|
343
|
+
position
|
344
|
+
end
|
345
|
+
elsif position.is_a?(Set)
|
346
|
+
position.map { |it| resolve[it] }.to_set
|
291
347
|
else
|
292
348
|
position
|
293
349
|
end
|
294
350
|
end
|
295
351
|
|
296
|
-
|
297
|
-
|
298
|
-
lambda do |split|
|
299
|
-
case split.position when *positions then match else !match end
|
300
|
-
end
|
352
|
+
->(split) { case split.position when *positions then action else !action end }
|
301
353
|
end
|
302
354
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chocolateboy
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: values
|
@@ -30,42 +30,42 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1
|
33
|
+
version: '2.1'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1
|
40
|
+
version: '2.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '5.
|
47
|
+
version: '5.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '5.
|
54
|
+
version: '5.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: minitest-power_assert
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.3
|
61
|
+
version: '0.3'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.3
|
68
|
+
version: '0.3'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: minitest-reporters
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -86,29 +86,15 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
89
|
+
version: '13.0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
97
|
-
|
98
|
-
name: rubocop
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: 0.54.0
|
104
|
-
type: :development
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - "~>"
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: 0.54.0
|
111
|
-
description:
|
96
|
+
version: '13.0'
|
97
|
+
description:
|
112
98
|
email: chocolate@cpan.org
|
113
99
|
executables: []
|
114
100
|
extensions: []
|
@@ -127,7 +113,7 @@ metadata:
|
|
127
113
|
bug_tracker_uri: https://github.com/chocolateboy/string_splitter/issues
|
128
114
|
changelog_uri: https://github.com/chocolateboy/string_splitter/blob/master/CHANGELOG.md
|
129
115
|
source_code_uri: https://github.com/chocolateboy/string_splitter
|
130
|
-
post_install_message:
|
116
|
+
post_install_message:
|
131
117
|
rdoc_options: []
|
132
118
|
require_paths:
|
133
119
|
- lib
|
@@ -135,16 +121,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
135
121
|
requirements:
|
136
122
|
- - ">="
|
137
123
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
124
|
+
version: '2.3'
|
139
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
126
|
requirements:
|
141
127
|
- - ">="
|
142
128
|
- !ruby/object:Gem::Version
|
143
129
|
version: '0'
|
144
130
|
requirements: []
|
145
|
-
|
146
|
-
|
147
|
-
signing_key:
|
131
|
+
rubygems_version: 3.1.4
|
132
|
+
signing_key:
|
148
133
|
specification_version: 4
|
149
134
|
summary: String#split on steroids
|
150
135
|
test_files: []
|