string_splitter 0.5.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +76 -11
- data/README.md +146 -53
- data/lib/string_splitter.rb +280 -188
- data/lib/string_splitter/split.rb +61 -0
- data/lib/string_splitter/version.rb +1 -1
- metadata +17 -45
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d922735ed5c3b8acdc9f0fa0d0c439f0293e1b72739dd1f6b9ef9018a332f6c9
|
|
4
|
+
data.tar.gz: b61f3b6e827675abd5fe1457a000735c4ae4a4a11dc858fc705b783820230fce
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f226e28ffb81f405ac986c01bf0cdb4270512d0a595d89d7d77ad1b53ed25dd122dac084887de26e26293d214906709724f018b83353a9928209257f6c80bc9e
|
|
7
|
+
data.tar.gz: f56357c60d8d52ff577a1ee5c4d16bbd4082bf29853dc3593fad3eefd6670a345ed245534944f5972fca1909e7e16526351eab898e4832d7cb2054ec86be4851
|
data/CHANGELOG.md
CHANGED
|
@@ -1,37 +1,102 @@
|
|
|
1
|
+
## 0.7.3 - 2020-08-24
|
|
2
|
+
|
|
3
|
+
#### Changes
|
|
4
|
+
|
|
5
|
+
- avoid exposing an internal Split method inside blocks
|
|
6
|
+
|
|
7
|
+
## 0.7.2 - 2020-08-22
|
|
8
|
+
|
|
9
|
+
#### Fixes
|
|
10
|
+
|
|
11
|
+
- fix/test default delimiter + `remove_empty_fields`
|
|
12
|
+
|
|
13
|
+
## 0.7.1 - 2020-08-22
|
|
14
|
+
|
|
15
|
+
#### Changes
|
|
16
|
+
|
|
17
|
+
- performance improvements
|
|
18
|
+
- delegate to `String#split` where possible
|
|
19
|
+
- use a regular class for Split rather than values.rb
|
|
20
|
+
- create Split objects directly rather than allocating intermediate hashes
|
|
21
|
+
|
|
22
|
+
## 0.7.0 - 2020-08-21
|
|
23
|
+
|
|
24
|
+
#### Breaking Changes
|
|
25
|
+
|
|
26
|
+
- `String#split` incompatibility: we no longer trim the string (with
|
|
27
|
+
`String#strip`) before splitting if the delimiter is omitted
|
|
28
|
+
|
|
29
|
+
## 0.6.0 - 2020-08-20
|
|
30
|
+
|
|
31
|
+
#### Breaking Changes
|
|
32
|
+
|
|
33
|
+
- `ss.split(str, " ")` is no longer treated the same as `ss.split(str)` i.e.
|
|
34
|
+
unlike Ruby's `String#split`, the former no longer strips the string before
|
|
35
|
+
splitting
|
|
36
|
+
- rename the `remove_empty` option `remove_empty_fields`
|
|
37
|
+
- rename the `exclude` option `except` (alias for `reject`)
|
|
38
|
+
|
|
39
|
+
#### Features
|
|
40
|
+
|
|
41
|
+
- add support for descending, negative, and infinite ranges,
|
|
42
|
+
e.g. `ss.split(str, ":", at: [..4, 4..., 3..1, -1..-3])` etc.
|
|
43
|
+
|
|
44
|
+
#### Fixes
|
|
45
|
+
|
|
46
|
+
- correctly handle backreferences in delimiter patterns
|
|
47
|
+
|
|
1
48
|
## 0.5.1 - 2018-07-01
|
|
2
49
|
|
|
50
|
+
#### Changes
|
|
51
|
+
|
|
3
52
|
- set StringSplitter::VERSION when `string_splitter.rb` is loaded
|
|
4
|
-
- doc tweaks
|
|
5
53
|
|
|
6
54
|
## 0.5.0 - 2018-06-26
|
|
7
55
|
|
|
8
|
-
|
|
56
|
+
#### Features
|
|
57
|
+
|
|
9
58
|
- add a `reject`/`exclude` option which rejects splits at the specified positions
|
|
10
59
|
- add a `select` alias for `at`
|
|
11
60
|
|
|
61
|
+
#### Fixes
|
|
62
|
+
|
|
63
|
+
- don't treat string delimiters as patterns
|
|
64
|
+
|
|
12
65
|
## 0.4.0 - 2018-06-24
|
|
13
66
|
|
|
14
|
-
|
|
67
|
+
#### Breaking Changes
|
|
68
|
+
|
|
69
|
+
- remove the `offset` alias for `split.index`
|
|
15
70
|
|
|
16
71
|
## 0.3.1 - 2018-06-24
|
|
17
72
|
|
|
18
|
-
|
|
73
|
+
#### Fixes
|
|
74
|
+
|
|
75
|
+
- remove trailing empty field when the separator is empty
|
|
76
|
+
([#1](https://github.com/chocolateboy/string_splitter/issues/1))
|
|
19
77
|
|
|
20
78
|
## 0.3.0 - 2018-06-23
|
|
21
79
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
80
|
+
#### Breaking Changes
|
|
81
|
+
|
|
82
|
+
- rename the `default_separator` option `default_delimiter`
|
|
25
83
|
|
|
26
84
|
## 0.2.0 - 2018-06-22
|
|
27
85
|
|
|
28
|
-
|
|
29
|
-
|
|
86
|
+
#### Breaking Changes
|
|
87
|
+
|
|
88
|
+
- make `index` (AKA `offset`) 0-based and add `position` (AKA `pos`) as the
|
|
89
|
+
1-based accessor
|
|
30
90
|
|
|
31
91
|
## 0.1.0 - 2018-06-22
|
|
32
92
|
|
|
33
|
-
|
|
34
|
-
|
|
93
|
+
#### Breaking Changes
|
|
94
|
+
|
|
95
|
+
- the block now takes a single `split` object with an `index` accessor, rather
|
|
96
|
+
than separate `index` and `split` arguments
|
|
97
|
+
|
|
98
|
+
#### Features
|
|
99
|
+
|
|
35
100
|
- add support for negative indices in the value supplied to the `at` option
|
|
36
101
|
- add a `count` field to the split object containing the total number of splits
|
|
37
102
|
|
data/README.md
CHANGED
|
@@ -3,14 +3,15 @@
|
|
|
3
3
|
[](https://travis-ci.org/chocolateboy/string_splitter)
|
|
4
4
|
[](https://rubygems.org/gems/string_splitter)
|
|
5
5
|
|
|
6
|
-
<!--
|
|
7
|
-
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
|
6
|
+
<!-- toc -->
|
|
8
7
|
|
|
9
8
|
- [NAME](#name)
|
|
10
9
|
- [INSTALLATION](#installation)
|
|
11
10
|
- [SYNOPSIS](#synopsis)
|
|
12
11
|
- [DESCRIPTION](#description)
|
|
13
12
|
- [WHY?](#why)
|
|
13
|
+
- [CAVEATS](#caveats)
|
|
14
|
+
- [Differences from String#split](#differences-from-stringsplit)
|
|
14
15
|
- [COMPATIBILITY](#compatibility)
|
|
15
16
|
- [VERSION](#version)
|
|
16
17
|
- [SEE ALSO](#see-also)
|
|
@@ -19,7 +20,7 @@
|
|
|
19
20
|
- [AUTHOR](#author)
|
|
20
21
|
- [COPYRIGHT AND LICENSE](#copyright-and-license)
|
|
21
22
|
|
|
22
|
-
<!--
|
|
23
|
+
<!-- tocstop -->
|
|
23
24
|
|
|
24
25
|
# NAME
|
|
25
26
|
|
|
@@ -42,16 +43,25 @@ ss = StringSplitter.new
|
|
|
42
43
|
**Same as `String#split`**
|
|
43
44
|
|
|
44
45
|
```ruby
|
|
45
|
-
ss.split("foo bar baz
|
|
46
|
-
ss.split("foo bar baz
|
|
47
|
-
ss.split("foo bar baz
|
|
48
|
-
# => ["foo", "bar", "baz"
|
|
46
|
+
ss.split("foo bar baz")
|
|
47
|
+
ss.split("foo bar baz", " ")
|
|
48
|
+
ss.split("foo bar baz", /\s+/)
|
|
49
|
+
# => ["foo", "bar", "baz"]
|
|
50
|
+
|
|
51
|
+
ss.split("foo", "")
|
|
52
|
+
ss.split("foo", //)
|
|
53
|
+
# => ["f", "o", "o"]
|
|
54
|
+
|
|
55
|
+
ss.split("", "...")
|
|
56
|
+
ss.split("", /.../)
|
|
57
|
+
# => []
|
|
49
58
|
```
|
|
50
59
|
|
|
51
60
|
**Split at the first delimiter**
|
|
52
61
|
|
|
53
62
|
```ruby
|
|
54
63
|
ss.split("foo:bar:baz:quux", ":", at: 1)
|
|
64
|
+
ss.split("foo:bar:baz:quux", ":", select: 1)
|
|
55
65
|
# => ["foo", "bar:baz:quux"]
|
|
56
66
|
```
|
|
57
67
|
|
|
@@ -65,54 +75,91 @@ ss.split("foo:bar:baz:quux", ":", at: -1)
|
|
|
65
75
|
**Split at multiple delimiter positions**
|
|
66
76
|
|
|
67
77
|
```ruby
|
|
68
|
-
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -
|
|
69
|
-
# => ["1", "2", "3", "4:5:6:7", "
|
|
78
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -1])
|
|
79
|
+
# => ["1", "2", "3", "4:5:6:7:8", "9"]
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
**Split at all but the first and last delimiters**
|
|
83
|
+
|
|
84
|
+
```ruby
|
|
85
|
+
ss.split("1:2:3:4:5:6", ":", except: [1, -1])
|
|
86
|
+
ss.split("1:2:3:4:5:6", ":", reject: [1, -1])
|
|
87
|
+
# => ["1:2", "3", "4", "5:6"]
|
|
70
88
|
```
|
|
71
89
|
|
|
72
90
|
**Split from the right**
|
|
73
91
|
|
|
74
92
|
```ruby
|
|
75
|
-
ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3,
|
|
76
|
-
# => ["1
|
|
93
|
+
ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -1])
|
|
94
|
+
# => ["1", "2:3:4:5:6", "7", "8", "9"]
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
**Split with negative, descending, and infinite ranges**
|
|
98
|
+
|
|
99
|
+
```ruby
|
|
100
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: ..-3)
|
|
101
|
+
# => ["1", "2", "3", "4", "5", "6", "7:8:9"]
|
|
102
|
+
|
|
103
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: 4...)
|
|
104
|
+
# => ["1:2:3:4", "5", "6", "7", "8:9"]
|
|
105
|
+
|
|
106
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1, 5..3, -2..])
|
|
107
|
+
# => ["1", "2:3", "4", "5", "6:7", "8", "9"]
|
|
77
108
|
```
|
|
109
|
+
|
|
78
110
|
**Full control via a block**
|
|
79
111
|
|
|
80
112
|
```ruby
|
|
81
|
-
result = ss.split(
|
|
82
|
-
split.
|
|
113
|
+
result = ss.split("1:2:3:4:5:6:7:8", ":") do |split|
|
|
114
|
+
split.pos % 2 == 0
|
|
83
115
|
end
|
|
84
|
-
# => ["
|
|
116
|
+
# => ["1:2", "3:4", "5:6", "7:8"]
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
```ruby
|
|
120
|
+
string = "banana".chars.sort.join # "aaabnn"
|
|
121
|
+
|
|
122
|
+
ss.split(string, "") do |split|
|
|
123
|
+
split.rhs != split.lhs
|
|
124
|
+
end
|
|
125
|
+
# => ["aaa", "b", "nn"]
|
|
85
126
|
```
|
|
86
127
|
|
|
87
128
|
# DESCRIPTION
|
|
88
129
|
|
|
89
|
-
Many languages have built-in `split` functions/methods for strings. They behave
|
|
90
|
-
(notwithstanding the occasional
|
|
91
|
-
and
|
|
130
|
+
Many languages have built-in `split` functions/methods for strings. They behave
|
|
131
|
+
similarly (notwithstanding the occasional
|
|
132
|
+
[surprise](https://chriszetter.com/blog/2017/10/29/splitting-strings/)), and
|
|
133
|
+
handle a few common cases, e.g.:
|
|
92
134
|
|
|
93
135
|
* limiting the number of splits
|
|
94
136
|
* including the separator(s) in the results
|
|
95
137
|
* removing (some) empty fields
|
|
96
138
|
|
|
97
|
-
But, because the API is squeezed into two overloaded parameters (the delimiter
|
|
98
|
-
achieving the desired results can be tricky. For instance,
|
|
99
|
-
trailing fields (by default), it provides no
|
|
100
|
-
|
|
101
|
-
to
|
|
139
|
+
But, because the API is squeezed into two overloaded parameters (the delimiter
|
|
140
|
+
and the limit), achieving the desired results can be tricky. For instance,
|
|
141
|
+
while `String#split` removes empty trailing fields (by default), it provides no
|
|
142
|
+
way to remove *all* empty fields. Likewise, the cramped API means there's no
|
|
143
|
+
way to, e.g., combine a limit (positive integer) with the option to preserve
|
|
144
|
+
empty fields (negative integer), or use backreferences in a delimiter pattern
|
|
102
145
|
without including its captured subexpressions in the result.
|
|
103
146
|
|
|
104
|
-
If `split` was being written from scratch, without the baggage of its legacy
|
|
105
|
-
it's possible that some of these options would be made explicit rather
|
|
106
|
-
the parameters. And, indeed, this is possible in some
|
|
107
|
-
e.g. in Crystal:
|
|
147
|
+
If `split` was being written from scratch, without the baggage of its legacy
|
|
148
|
+
API, it's possible that some of these options would be made explicit rather
|
|
149
|
+
than overloading the parameters. And, indeed, this is possible in some
|
|
150
|
+
implementations, e.g. in Crystal:
|
|
108
151
|
|
|
109
152
|
```ruby
|
|
110
|
-
":foo:bar:baz:".split(":", remove_empty: false)
|
|
111
|
-
|
|
153
|
+
":foo:bar:baz:".split(":", remove_empty: false)
|
|
154
|
+
# => ["", "foo", "bar", "baz", ""]
|
|
155
|
+
|
|
156
|
+
":foo:bar:baz:".split(":", remove_empty: true)
|
|
157
|
+
# => ["foo", "bar", "baz"]
|
|
112
158
|
````
|
|
113
159
|
|
|
114
|
-
StringSplitter takes this one step further by moving the configuration out of
|
|
115
|
-
and delegating the strategy — i.e. which splits should be
|
|
160
|
+
StringSplitter takes this one step further by moving the configuration out of
|
|
161
|
+
the method altogether and delegating the strategy — i.e. which splits should be
|
|
162
|
+
accepted or rejected — to a block:
|
|
116
163
|
|
|
117
164
|
```ruby
|
|
118
165
|
ss = StringSplitter.new
|
|
@@ -120,23 +167,32 @@ ss = StringSplitter.new
|
|
|
120
167
|
ss.split("foo:bar:baz", ":") { |split| split.index == 0 }
|
|
121
168
|
# => ["foo", "bar:baz"]
|
|
122
169
|
|
|
123
|
-
ss.split("foo:bar:baz", ":")
|
|
124
|
-
|
|
170
|
+
ss.split("foo:bar:baz:quux", ":") do |split|
|
|
171
|
+
split.position == 1 || split.position == 3
|
|
172
|
+
end
|
|
173
|
+
# => ["foo", "bar:baz", "quux"]
|
|
125
174
|
```
|
|
126
175
|
|
|
127
|
-
As a shortcut, the common case of splitting
|
|
176
|
+
As a shortcut, the common case of splitting (or not splitting) at one or more
|
|
177
|
+
positions is supported by dedicated options:
|
|
128
178
|
|
|
129
179
|
```ruby
|
|
130
|
-
ss.split(
|
|
180
|
+
ss.split("foo:bar:baz:quux", ":", select: [1, -1])
|
|
181
|
+
# => ["foo", "bar:baz", "quux"]
|
|
182
|
+
|
|
183
|
+
ss.split("foo:bar:baz:quux", ":", reject: [1, -1])
|
|
184
|
+
# => ["foo:bar", "baz:quux"]
|
|
131
185
|
```
|
|
132
186
|
|
|
133
187
|
# WHY?
|
|
134
188
|
|
|
135
|
-
I wanted to split semi-structured output into fields without having to resort
|
|
189
|
+
I wanted to split semi-structured output into fields without having to resort
|
|
190
|
+
to a regex or a full-blown parser.
|
|
136
191
|
|
|
137
|
-
As an example, the nominally unstructured output of many Unix commands is often
|
|
138
|
-
that's tantalizingly close to being
|
|
139
|
-
|
|
192
|
+
As an example, the nominally unstructured output of many Unix commands is often
|
|
193
|
+
formatted in a way that's tantalizingly close to being
|
|
194
|
+
[machine-readable](https://en.wikipedia.org/wiki/Delimiter-separated_values),
|
|
195
|
+
apart from a few pesky exceptions, e.g.:
|
|
140
196
|
|
|
141
197
|
```bash
|
|
142
198
|
$ ls -l
|
|
@@ -148,8 +204,8 @@ drwxr-xr-x 3 user users 4096 Jun 19 22:56 lib
|
|
|
148
204
|
-rw-r--r-- 1 user users 3134 Jun 19 22:59 README.md
|
|
149
205
|
```
|
|
150
206
|
|
|
151
|
-
These lines can *almost* be parsed into an array of fields by splitting them on
|
|
152
|
-
date (columns 6-8) i.e.:
|
|
207
|
+
These lines can *almost* be parsed into an array of fields by splitting them on
|
|
208
|
+
whitespace. The exception is the date (columns 6-8), i.e.:
|
|
153
209
|
|
|
154
210
|
```ruby
|
|
155
211
|
line = "-rw-r--r-- 1 user users 87 Jun 18 18:16 CHANGELOG.md"
|
|
@@ -168,19 +224,20 @@ instead of:
|
|
|
168
224
|
["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
|
169
225
|
```
|
|
170
226
|
|
|
171
|
-
One way to work around this is to parse the whole line e.g.:
|
|
227
|
+
One way to work around this is to parse the whole line, e.g.:
|
|
172
228
|
|
|
173
229
|
```ruby
|
|
174
230
|
line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+ \S+) \s+ (.+)$/x)
|
|
175
231
|
```
|
|
176
232
|
|
|
177
|
-
But that requires us to specify *everything*. What we really want is a version
|
|
178
|
-
which allows us to veto splitting for the 6th and 7th delimiters
|
|
179
|
-
|
|
180
|
-
|
|
233
|
+
But that requires us to specify *everything*. What we really want is a version
|
|
234
|
+
of `split` which allows us to veto splitting for the 6th and 7th delimiters
|
|
235
|
+
(and to stop after the 8th delimiter), i.e. control over which splits are
|
|
236
|
+
accepted, rather than being restricted to the single, baked-in strategy
|
|
237
|
+
provided by the `limit` parameter.
|
|
181
238
|
|
|
182
|
-
By providing a simple way to accept or reject each split, StringSplitter makes
|
|
183
|
-
this easy to handle, either via a block:
|
|
239
|
+
By providing a simple way to accept or reject each split, StringSplitter makes
|
|
240
|
+
cases like this easy to handle, either via a block:
|
|
184
241
|
|
|
185
242
|
```ruby
|
|
186
243
|
ss.split(line) do |split|
|
|
@@ -196,14 +253,51 @@ ss.split(line, at: [1..5, 8])
|
|
|
196
253
|
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
|
197
254
|
```
|
|
198
255
|
|
|
256
|
+
# CAVEATS
|
|
257
|
+
|
|
258
|
+
## Differences from String#split
|
|
259
|
+
|
|
260
|
+
Unlike `String#split`, StringSplitter doesn't trim the string before splitting
|
|
261
|
+
if the delimiter is omitted or a single space, e.g.:
|
|
262
|
+
|
|
263
|
+
```ruby
|
|
264
|
+
" foo bar baz ".split # => ["foo", "bar", "baz"]
|
|
265
|
+
" foo bar baz ".split(" ") # => ["foo", "bar", "baz"]
|
|
266
|
+
|
|
267
|
+
ss.split(" foo bar baz ") # => ["", "foo", "bar", "baz", ""]
|
|
268
|
+
ss.split(" foo bar baz ", " ") # => ["", "foo", "bar", "baz", ""]
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
`String#split` omits the `nil` values of unmatched optional captures:
|
|
272
|
+
|
|
273
|
+
```ruby
|
|
274
|
+
"foo:bar:baz".scan(/(:)|(-)/) # => [[":", nil], [":", nil]]
|
|
275
|
+
"foo:bar:baz".split(/(:)|(-)/) # => ["foo", ":", "bar", ":", "baz"]
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
StringSplitter preserves them by default (if `include_captures` is true, as it
|
|
279
|
+
is by default), though they can be omitted from spread captures by passing
|
|
280
|
+
`:compact` as the value of the `spread_captures` option:
|
|
281
|
+
|
|
282
|
+
```ruby
|
|
283
|
+
s1 = StringSplitter.new(spread_captures: true)
|
|
284
|
+
s2 = StringSplitter.new(spread_captures: false)
|
|
285
|
+
s3 = StringSplitter.new(spread_captures: :compact)
|
|
286
|
+
|
|
287
|
+
s1.split("foo:bar:baz", /(:)|(-)/) # => ["foo", ":", nil, "bar", ":", nil, "baz"]
|
|
288
|
+
s2.split("foo:bar:baz", /(:)|(-)/) # => ["foo", [":", nil], "bar", [":", nil], "baz"]
|
|
289
|
+
s3.split("foo:bar:baz", /(:)|(-)/) # => ["foo", ":", "bar", ":", "baz"]
|
|
290
|
+
```
|
|
291
|
+
|
|
199
292
|
# COMPATIBILITY
|
|
200
293
|
|
|
201
|
-
StringSplitter is tested and supported on all versions of Ruby [supported by
|
|
202
|
-
|
|
294
|
+
StringSplitter is tested and supported on all versions of Ruby [supported by
|
|
295
|
+
the ruby-core team](https://www.ruby-lang.org/en/downloads/branches/), i.e.,
|
|
296
|
+
currently, Ruby 2.5 and above.
|
|
203
297
|
|
|
204
298
|
# VERSION
|
|
205
299
|
|
|
206
|
-
0.
|
|
300
|
+
0.7.3
|
|
207
301
|
|
|
208
302
|
# SEE ALSO
|
|
209
303
|
|
|
@@ -221,8 +315,7 @@ i.e., currently, Ruby 2.3 and above.
|
|
|
221
315
|
|
|
222
316
|
# COPYRIGHT AND LICENSE
|
|
223
317
|
|
|
224
|
-
Copyright © 2018 by chocolateboy.
|
|
318
|
+
Copyright © 2018-2020 by chocolateboy.
|
|
225
319
|
|
|
226
320
|
This is free software; you can redistribute it and/or modify it under the
|
|
227
|
-
terms of the [Artistic License 2.0](
|
|
228
|
-
|
|
321
|
+
terms of the [Artistic License 2.0](https://www.opensource.org/licenses/artistic-license-2.0.php).
|
data/lib/string_splitter.rb
CHANGED
|
@@ -1,54 +1,94 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require '
|
|
3
|
+
require 'set'
|
|
4
|
+
|
|
5
|
+
require_relative 'string_splitter/split'
|
|
4
6
|
require_relative 'string_splitter/version'
|
|
5
7
|
|
|
6
8
|
# This class extends the functionality of +String#split+ by:
|
|
7
9
|
#
|
|
8
10
|
# - providing full control over which splits are accepted or rejected
|
|
11
|
+
#
|
|
9
12
|
# - adding support for splitting from right-to-left
|
|
13
|
+
#
|
|
10
14
|
# - encapsulating splitting options/preferences in the splitter rather
|
|
11
15
|
# than trying to cram them into overloaded method parameters
|
|
12
16
|
#
|
|
13
17
|
# These enhancements allow splits to handle many cases that otherwise require bigger
|
|
14
|
-
# guns e.g. regex matching or parsing.
|
|
18
|
+
# guns, e.g. regex matching or parsing.
|
|
19
|
+
#
|
|
20
|
+
# Implementation-wise, we split the string either with String#split, or with a custom
|
|
21
|
+
# scanner if the delimiter may contain captures (since String#split doesn't handle
|
|
22
|
+
# them correctly), and parse the resulting tokens into an array of Split objects with
|
|
23
|
+
# the following attributes:
|
|
24
|
+
#
|
|
25
|
+
# - captures: separator substrings captured by parentheses in the delimiter pattern
|
|
26
|
+
# - count: the number of splits
|
|
27
|
+
# - index: the 0-based index of the split in the array
|
|
28
|
+
# - lhs: the string to the left of the separator (back to the previous split candidate)
|
|
29
|
+
# - position: the 1-based index of the split in the array (alias: pos)
|
|
30
|
+
# - rhs: the string to the right of the separator (up to the next split candidate)
|
|
31
|
+
# - rindex: the 0-based index of the split relative to the end of the array
|
|
32
|
+
# - rposition: the 1-based index of the split relative to the end of the array (alias: rpos)
|
|
33
|
+
# - separator: the string matched by the delimiter pattern/string
|
|
34
|
+
#
|
|
15
35
|
class StringSplitter
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
36
|
+
# terminology: the delimiter is what we provide and the separators are what we get
|
|
37
|
+
# back (if we capture them). e.g. for:
|
|
38
|
+
#
|
|
39
|
+
# ss.split("foo:bar::baz", /(\W+)/)
|
|
40
|
+
#
|
|
41
|
+
# the delimiter is /(\W)/ and the separators are ":" and "::"
|
|
19
42
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
index + 1
|
|
23
|
-
end
|
|
43
|
+
# pull in the StringSplitter::Split#update! method
|
|
44
|
+
using Split::Refinements
|
|
24
45
|
|
|
25
|
-
|
|
46
|
+
ACCEPT_ALL = ->(_split) { true }
|
|
47
|
+
DEFAULT_DELIMITER = /\s+/.freeze
|
|
48
|
+
REMOVE = [].freeze
|
|
49
|
+
|
|
50
|
+
# simulate an enum. the value is returned by the case statement
|
|
51
|
+
# in the generated block if the positions match
|
|
52
|
+
module Action
|
|
53
|
+
SELECT = true
|
|
54
|
+
REJECT = false
|
|
26
55
|
end
|
|
27
56
|
|
|
57
|
+
private_constant :Action
|
|
58
|
+
|
|
28
59
|
def initialize(
|
|
29
60
|
default_delimiter: DEFAULT_DELIMITER,
|
|
30
61
|
include_captures: true,
|
|
31
|
-
remove_empty: false,
|
|
62
|
+
remove_empty: false, # TODO remove this
|
|
63
|
+
remove_empty_fields: remove_empty,
|
|
32
64
|
spread_captures: true
|
|
33
65
|
)
|
|
34
66
|
@default_delimiter = default_delimiter
|
|
35
67
|
@include_captures = include_captures
|
|
36
|
-
@
|
|
68
|
+
@remove_empty_fields = remove_empty_fields
|
|
37
69
|
@spread_captures = spread_captures
|
|
38
70
|
end
|
|
39
71
|
|
|
40
|
-
attr_reader
|
|
72
|
+
attr_reader(
|
|
73
|
+
:default_delimiter,
|
|
74
|
+
:include_captures,
|
|
75
|
+
:remove_empty_fields,
|
|
76
|
+
:spread_captures
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# TODO remove this
|
|
80
|
+
alias remove_empty remove_empty_fields
|
|
41
81
|
|
|
42
82
|
def split(
|
|
43
83
|
string,
|
|
44
84
|
delimiter = @default_delimiter,
|
|
45
|
-
at: nil,
|
|
85
|
+
at: nil, # alias for select
|
|
86
|
+
except: nil, # alias for reject
|
|
46
87
|
select: at,
|
|
47
|
-
|
|
48
|
-
reject: exclude,
|
|
88
|
+
reject: except,
|
|
49
89
|
&block
|
|
50
90
|
)
|
|
51
|
-
result, splits,
|
|
91
|
+
result, splits, count, accept = init(
|
|
52
92
|
string: string,
|
|
53
93
|
delimiter: delimiter,
|
|
54
94
|
select: select,
|
|
@@ -56,29 +96,22 @@ class StringSplitter
|
|
|
56
96
|
block: block
|
|
57
97
|
)
|
|
58
98
|
|
|
59
|
-
|
|
99
|
+
return result unless splits
|
|
100
|
+
|
|
101
|
+
result << splits.first.lhs
|
|
60
102
|
|
|
61
103
|
splits.each_with_index do |split, index|
|
|
62
|
-
split
|
|
63
|
-
result << split.lhs if result.empty?
|
|
64
|
-
|
|
65
|
-
if block.call(split)
|
|
66
|
-
if @include_captures
|
|
67
|
-
if @spread_captures
|
|
68
|
-
result += split.captures
|
|
69
|
-
else
|
|
70
|
-
result << split.captures
|
|
71
|
-
end
|
|
72
|
-
end
|
|
104
|
+
split.update!(count: count, index: index)
|
|
73
105
|
|
|
74
|
-
|
|
106
|
+
if accept.call(split)
|
|
107
|
+
result << split.captures << split.rhs
|
|
75
108
|
else
|
|
76
|
-
#
|
|
109
|
+
# append the rhs
|
|
77
110
|
result[-1] = result[-1] + split.separator + split.rhs
|
|
78
111
|
end
|
|
79
112
|
end
|
|
80
113
|
|
|
81
|
-
result
|
|
114
|
+
render(result)
|
|
82
115
|
end
|
|
83
116
|
|
|
84
117
|
alias lsplit split
|
|
@@ -86,13 +119,13 @@ class StringSplitter
|
|
|
86
119
|
def rsplit(
|
|
87
120
|
string,
|
|
88
121
|
delimiter = @default_delimiter,
|
|
89
|
-
at: nil,
|
|
122
|
+
at: nil, # alias for select
|
|
123
|
+
except: nil, # alias for reject
|
|
90
124
|
select: at,
|
|
91
|
-
|
|
92
|
-
reject: exclude,
|
|
125
|
+
reject: except,
|
|
93
126
|
&block
|
|
94
127
|
)
|
|
95
|
-
result, splits,
|
|
128
|
+
result, splits, count, accept = init(
|
|
96
129
|
string: string,
|
|
97
130
|
delimiter: delimiter,
|
|
98
131
|
select: select,
|
|
@@ -100,203 +133,262 @@ class StringSplitter
|
|
|
100
133
|
block: block
|
|
101
134
|
)
|
|
102
135
|
|
|
103
|
-
|
|
136
|
+
return result unless splits
|
|
104
137
|
|
|
105
|
-
splits.
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
if block.call(split)
|
|
110
|
-
if @include_captures
|
|
111
|
-
if @spread_captures
|
|
112
|
-
result = split.captures + result
|
|
113
|
-
else
|
|
114
|
-
result.unshift(split.captures)
|
|
115
|
-
end
|
|
116
|
-
end
|
|
138
|
+
result.unshift(splits.last.rhs)
|
|
139
|
+
|
|
140
|
+
splits.reverse_each.with_index do |split, index|
|
|
141
|
+
split.update!(count: count, index: index)
|
|
117
142
|
|
|
118
|
-
|
|
143
|
+
if accept.call(split)
|
|
144
|
+
# [lhs + captures] + result
|
|
145
|
+
result.unshift(split.lhs, split.captures)
|
|
119
146
|
else
|
|
120
147
|
# prepend the lhs
|
|
121
148
|
result[0] = split.lhs + split.separator + result[0]
|
|
122
149
|
end
|
|
123
150
|
end
|
|
124
151
|
|
|
125
|
-
result
|
|
152
|
+
render(result)
|
|
126
153
|
end
|
|
127
154
|
|
|
128
155
|
private
|
|
129
156
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
157
|
+
# initialisation common to +split+ and +rsplit+
|
|
158
|
+
#
|
|
159
|
+
# takes a hash of options passed to +split+ or +rsplit+ and returns a tuple with
|
|
160
|
+
# the following fields:
|
|
161
|
+
#
|
|
162
|
+
# - result: the array of separated strings to return from +split+ or +rsplit+.
|
|
163
|
+
# if the splits array is empty, the caller returns this array immediately
|
|
164
|
+
# without any further processing
|
|
165
|
+
#
|
|
166
|
+
# - splits: an array of Split objects exposing the lhs, rhs, separator and
|
|
167
|
+
# captured separator substrings for each split
|
|
168
|
+
#
|
|
169
|
+
# - count: the number of splits
|
|
170
|
+
#
|
|
171
|
+
# - accept: a proc whose return value determines whether each split should be
|
|
172
|
+
# accepted (true) or rejected (false)
|
|
173
|
+
#
|
|
174
|
+
def init(string:, delimiter:, select:, reject:, block:)
|
|
175
|
+
return [[]] if string.empty?
|
|
176
|
+
|
|
177
|
+
unless block
|
|
178
|
+
if reject
|
|
179
|
+
positions = reject
|
|
180
|
+
action = Action::REJECT
|
|
181
|
+
elsif select
|
|
182
|
+
positions = select
|
|
183
|
+
action = Action::SELECT
|
|
184
|
+
else
|
|
185
|
+
block = ACCEPT_ALL
|
|
186
|
+
end
|
|
187
|
+
end
|
|
133
188
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
if lhs.empty? && rhs.empty?
|
|
142
|
-
# do nothing
|
|
143
|
-
elsif parts.empty? # last split
|
|
144
|
-
result << (!lhs.empty? ? lhs : rhs) if splits.empty?
|
|
145
|
-
elsif rhs.empty?
|
|
146
|
-
# replace the empty rhs with the non-empty lhs
|
|
147
|
-
parts[0] = lhs
|
|
148
|
-
end
|
|
189
|
+
# use String#split if we can
|
|
190
|
+
#
|
|
191
|
+
# NOTE +reject!+ is no faster than +reject+ on MRI and significantly slower
|
|
192
|
+
# on TruffleRuby
|
|
193
|
+
|
|
194
|
+
if delimiter.is_a?(String)
|
|
195
|
+
limit = -1
|
|
149
196
|
|
|
150
|
-
|
|
197
|
+
if delimiter == ' '
|
|
198
|
+
delimiter = / / # don't trim
|
|
199
|
+
elsif delimiter.empty?
|
|
200
|
+
limit = 0 # remove the trailing empty string
|
|
151
201
|
end
|
|
152
202
|
|
|
153
|
-
|
|
154
|
-
lhs: lhs,
|
|
155
|
-
rhs: rhs,
|
|
156
|
-
separator: separator,
|
|
157
|
-
captures: captures,
|
|
158
|
-
}
|
|
159
|
-
end
|
|
203
|
+
result = string.split(delimiter, limit)
|
|
160
204
|
|
|
161
|
-
|
|
162
|
-
end
|
|
205
|
+
return [result] if result.length == 1 # delimiter not found: no splits
|
|
163
206
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
# triple, where `result` is the return value of the method, `splits` is an array
|
|
169
|
-
# of hashes containing the lhs/rhs, separator and captures of each split, and
|
|
170
|
-
# `block` is a proc which specifies whether each split should be accepted or
|
|
171
|
-
# rejected
|
|
172
|
-
def split_init(string:, delimiter:, select:, reject:, block:)
|
|
173
|
-
unless (match = string.match(delimiter))
|
|
174
|
-
result = (@remove_empty && string.empty?) ? [] : [string]
|
|
175
|
-
return [result, NO_SPLITS, block]
|
|
176
|
-
end
|
|
207
|
+
if block == ACCEPT_ALL # return the (2 or more) fields
|
|
208
|
+
result = result.reject(&:empty?) if @remove_empty_fields
|
|
209
|
+
return [result]
|
|
210
|
+
end
|
|
177
211
|
|
|
178
|
-
|
|
179
|
-
reject = Array(reject)
|
|
212
|
+
splits = []
|
|
180
213
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
214
|
+
result.each_cons(2) do |lhs, rhs| # 2 or more fields
|
|
215
|
+
splits << Split.new(
|
|
216
|
+
captures: [],
|
|
217
|
+
lhs: lhs,
|
|
218
|
+
rhs: rhs,
|
|
219
|
+
separator: delimiter
|
|
220
|
+
)
|
|
221
|
+
end
|
|
222
|
+
elsif delimiter == DEFAULT_DELIMITER && block == ACCEPT_ALL
|
|
223
|
+
# non-empty separators so -1 is safe
|
|
224
|
+
|
|
225
|
+
# XXX String#split with block was introduced in Ruby 2.6:
|
|
226
|
+
#
|
|
227
|
+
# - https://rubyreferences.github.io/rubychanges/2.6.html#stringsplit-with-block
|
|
228
|
+
#
|
|
229
|
+
# rather than sniffing, we'll just use the compatible version for now
|
|
230
|
+
#
|
|
231
|
+
# if @remove_empty_fields
|
|
232
|
+
# result = []
|
|
233
|
+
#
|
|
234
|
+
# string.split(delimiter, -1) do |field|
|
|
235
|
+
# result << field unless field.empty?
|
|
236
|
+
# end
|
|
237
|
+
# else
|
|
238
|
+
# result = string.split(delimiter, -1)
|
|
239
|
+
# end
|
|
240
|
+
|
|
241
|
+
result = string.split(delimiter, -1)
|
|
242
|
+
result = result.reject(&:empty?) if @remove_empty_fields
|
|
243
|
+
return [result]
|
|
244
|
+
else
|
|
245
|
+
splits = parse(string, delimiter)
|
|
187
246
|
end
|
|
188
247
|
|
|
189
|
-
|
|
190
|
-
delimiter = Regexp.quote(delimiter) if delimiter.is_a?(String)
|
|
191
|
-
delimiter = increment_backrefs(delimiter, ncaptures)
|
|
192
|
-
parts = string.split(/(#{delimiter})/, -1)
|
|
193
|
-
remove_trailing_empty_field!(parts, ncaptures)
|
|
194
|
-
result, splits = splits_for(parts, ncaptures)
|
|
195
|
-
block ||= positions ? match_positions(positions, action, splits.length) : ACCEPT_ALL
|
|
248
|
+
count = splits.length
|
|
196
249
|
|
|
197
|
-
[
|
|
250
|
+
return [[string]] if count.zero?
|
|
251
|
+
|
|
252
|
+
block ||= compile(positions, action, count)
|
|
253
|
+
[[], splits, count, block]
|
|
198
254
|
end
|
|
199
255
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
256
|
+
def render(values)
|
|
257
|
+
values.flat_map do |value|
|
|
258
|
+
if value.is_a?(String)
|
|
259
|
+
value.empty? && @remove_empty_fields ? REMOVE : [value]
|
|
260
|
+
elsif @include_captures
|
|
261
|
+
if @spread_captures
|
|
262
|
+
# TODO make sure compact can return a Capture
|
|
263
|
+
@spread_captures == :compact ? value.compact : value
|
|
264
|
+
elsif value.empty?
|
|
265
|
+
# we expose non-captures (string delimiters or regexps with no
|
|
266
|
+
# captures) as empty arrays inside the block, so the type is
|
|
267
|
+
# consistent, but it doesn't make sense to keep them in the
|
|
268
|
+
# result
|
|
269
|
+
REMOVE
|
|
270
|
+
else
|
|
271
|
+
[value]
|
|
272
|
+
end
|
|
273
|
+
else
|
|
274
|
+
REMOVE
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# takes a string and a delimiter pattern (regex or string) and splits it along
|
|
280
|
+
# the delimiter, returning an array of objects representing each split.
|
|
281
|
+
# e.g. for:
|
|
209
282
|
#
|
|
210
|
-
#
|
|
283
|
+
# parse("foo:bar:baz:quux", ":")
|
|
211
284
|
#
|
|
212
|
-
#
|
|
285
|
+
# we return:
|
|
213
286
|
#
|
|
214
|
-
#
|
|
287
|
+
# [
|
|
288
|
+
# Split.new(lhs: "foo", rhs: "bar", separator: ":", captures: []),
|
|
289
|
+
# Split.new(lhs: "bar", rhs: "baz", separator: ":", captures: []),
|
|
290
|
+
# Split.new(lhs: "baz", rhs: "quux", separator: ":", captures: []),
|
|
291
|
+
# ]
|
|
215
292
|
#
|
|
216
|
-
|
|
293
|
+
def parse(string, delimiter)
|
|
294
|
+
# has_names = delimiter.is_a?(Regexp) && !delimiter.names.empty?
|
|
295
|
+
splits = []
|
|
296
|
+
start = 0
|
|
217
297
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
298
|
+
# we don't use the argument passed to the +scan+ block here because it's a
|
|
299
|
+
# string (the separator) if there are no captures, rather than an empty
|
|
300
|
+
# array. we use match.captures instead to get the array
|
|
301
|
+
string.scan(delimiter) do
|
|
302
|
+
match = Regexp.last_match
|
|
303
|
+
index, after = match.offset(0)
|
|
304
|
+
separator = match[0]
|
|
305
|
+
|
|
306
|
+
# ignore empty separators at the beginning and/or end of the string
|
|
307
|
+
next if separator.empty? && (index.zero? || after == string.length)
|
|
308
|
+
|
|
309
|
+
lhs = string.slice(start, index - start)
|
|
310
|
+
splits.last.rhs = lhs unless splits.empty?
|
|
311
|
+
|
|
312
|
+
# this is correct for the last/only match, but gets updated to the next
|
|
313
|
+
# match's lhs for other matches
|
|
314
|
+
rhs = match.post_match
|
|
315
|
+
|
|
316
|
+
# captures = has_names ? Captures.new(match) : match.captures
|
|
317
|
+
|
|
318
|
+
splits << Split.new(
|
|
319
|
+
captures: match.captures,
|
|
320
|
+
lhs: lhs,
|
|
321
|
+
rhs: rhs,
|
|
322
|
+
separator: separator
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# advance the start index (the start of the next lhs) to the position
|
|
326
|
+
# after the last character of the separator
|
|
327
|
+
start = after
|
|
224
328
|
end
|
|
225
329
|
|
|
226
|
-
|
|
330
|
+
splits
|
|
227
331
|
end
|
|
228
332
|
|
|
229
|
-
#
|
|
230
|
-
# on
|
|
333
|
+
# returns a lambda which splits at (i.e. accepts or rejects splits at, depending
|
|
334
|
+
# on the action) the supplied positions
|
|
231
335
|
#
|
|
232
|
-
#
|
|
233
|
-
#
|
|
234
|
-
# # => ["f", "o", "o", "b", "a", "r", ""]
|
|
336
|
+
# positions are preprocessed to support negative indices, infinite ranges, and
|
|
337
|
+
# descending ranges, e.g.:
|
|
235
338
|
#
|
|
236
|
-
#
|
|
237
|
-
# # => ["f", "", "o", "", "o", "", "b", "", "a", "", "r", "", ""]
|
|
339
|
+
# ss.split("foo:bar:baz:quux", ":", at: -1)
|
|
238
340
|
#
|
|
239
|
-
#
|
|
240
|
-
# # => ["f", "", "", "o", "", "", "o", "", "", "b", "", "", "a", "", "", "r", "", "", ""]
|
|
341
|
+
# translates to:
|
|
241
342
|
#
|
|
242
|
-
#
|
|
243
|
-
#
|
|
244
|
-
#
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
#
|
|
254
|
-
#
|
|
343
|
+
# ss.split("foo:bar:baz:quux", ":", at: 3)
|
|
344
|
+
#
|
|
345
|
+
# and
|
|
346
|
+
#
|
|
347
|
+
# ss.split("1:2:3:4:5:6:7:8:9", ":", at: -3..)
|
|
348
|
+
#
|
|
349
|
+
# translates to:
|
|
350
|
+
#
|
|
351
|
+
# ss.split("1:2:3:4:5:6:7:8:9", ":", at: 6..8)
|
|
352
|
+
#
|
|
353
|
+
def compile(positions, action, count)
|
|
354
|
+
# XXX note: we don't use modulo, because we don't want
|
|
355
|
+
# out-of-bounds indices to silently work, e.g. we don't want:
|
|
255
356
|
#
|
|
256
|
-
#
|
|
357
|
+
# ss.split("foo:bar:baz:quux", ":", at: -42)
|
|
257
358
|
#
|
|
258
|
-
#
|
|
259
|
-
# + ncaptures
|
|
260
|
-
# + 1 (separator)
|
|
359
|
+
# to mysteriously match when the index/position is 0/1
|
|
261
360
|
#
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
# to mysteriously match when the position is 2
|
|
289
|
-
|
|
290
|
-
nsplits + 1 + position
|
|
361
|
+
resolve = ->(int) { int.negative? ? count + 1 + int : int }
|
|
362
|
+
|
|
363
|
+
# don't use Array(...) to wrap these as we don't want to convert ranges
|
|
364
|
+
positions = positions.is_a?(Array) ? positions : [positions]
|
|
365
|
+
|
|
366
|
+
positions = positions.map do |position|
|
|
367
|
+
if position.is_a?(Integer)
|
|
368
|
+
resolve[position]
|
|
369
|
+
elsif position.is_a?(Range)
|
|
370
|
+
rbegin = position.begin
|
|
371
|
+
rend = position.end
|
|
372
|
+
rexc = position.exclude_end?
|
|
373
|
+
|
|
374
|
+
if rbegin.nil?
|
|
375
|
+
Range.new(1, resolve[rend], rexc)
|
|
376
|
+
elsif rend.nil?
|
|
377
|
+
Range.new(resolve[rbegin], count, rexc)
|
|
378
|
+
elsif rbegin.negative? || rend.negative? || (rend - rbegin).negative?
|
|
379
|
+
from = resolve[rbegin]
|
|
380
|
+
to = resolve[rend]
|
|
381
|
+
to < from ? Range.new(to, from, rexc) : Range.new(from, to, rexc)
|
|
382
|
+
else
|
|
383
|
+
position
|
|
384
|
+
end
|
|
385
|
+
elsif position.is_a?(Set)
|
|
386
|
+
position.map { |it| resolve[it] }.to_set
|
|
291
387
|
else
|
|
292
388
|
position
|
|
293
389
|
end
|
|
294
390
|
end
|
|
295
391
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
lambda do |split|
|
|
299
|
-
case split.position when *positions then match else !match end
|
|
300
|
-
end
|
|
392
|
+
->(split) { case split.position when *positions then action else !action end }
|
|
301
393
|
end
|
|
302
394
|
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class StringSplitter
|
|
4
|
+
class Split
|
|
5
|
+
# expose the +update!+ method as a refinement to StringSplitter but don't
|
|
6
|
+
# expose it to blocks
|
|
7
|
+
#
|
|
8
|
+
# idea based on a suggestion here (as an alternative to a `friend` modifier):
|
|
9
|
+
# https://bugs.ruby-lang.org/issues/12962#note-5
|
|
10
|
+
module Refinements
|
|
11
|
+
refine Split do
|
|
12
|
+
def update!(count:, index:)
|
|
13
|
+
@count = count
|
|
14
|
+
@index = index
|
|
15
|
+
@position = index + 1
|
|
16
|
+
freeze
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
attr_reader :captures, :count, :index, :lhs, :position, :rhs, :separator
|
|
22
|
+
attr_writer :rhs
|
|
23
|
+
|
|
24
|
+
alias pos position
|
|
25
|
+
|
|
26
|
+
def initialize(captures:, lhs:, rhs:, separator:)
|
|
27
|
+
@captures = captures
|
|
28
|
+
@lhs = lhs
|
|
29
|
+
@rhs = rhs
|
|
30
|
+
@separator = separator
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# 0-based index relative to the end of the array, e.g. for 5 items:
|
|
34
|
+
#
|
|
35
|
+
# index | rindex
|
|
36
|
+
# ------|-------
|
|
37
|
+
# 0 | 4
|
|
38
|
+
# 1 | 3
|
|
39
|
+
# 2 | 2
|
|
40
|
+
# 3 | 1
|
|
41
|
+
# 4 | 0
|
|
42
|
+
def rindex
|
|
43
|
+
@count - @position
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# 1-based position relative to the end of the array, e.g. for 5 items:
|
|
47
|
+
#
|
|
48
|
+
# position | rposition
|
|
49
|
+
# ----------|----------
|
|
50
|
+
# 1 | 5
|
|
51
|
+
# 2 | 4
|
|
52
|
+
# 3 | 3
|
|
53
|
+
# 4 | 2
|
|
54
|
+
# 5 | 1
|
|
55
|
+
def rposition
|
|
56
|
+
@count + 1 - @position
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
alias rpos rposition
|
|
60
|
+
end
|
|
61
|
+
end
|
metadata
CHANGED
|
@@ -1,71 +1,57 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: string_splitter
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.7.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- chocolateboy
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2020-08-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
|
-
- !ruby/object:Gem::Dependency
|
|
14
|
-
name: values
|
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
|
16
|
-
requirements:
|
|
17
|
-
- - "~>"
|
|
18
|
-
- !ruby/object:Gem::Version
|
|
19
|
-
version: '1.8'
|
|
20
|
-
type: :runtime
|
|
21
|
-
prerelease: false
|
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
-
requirements:
|
|
24
|
-
- - "~>"
|
|
25
|
-
- !ruby/object:Gem::Version
|
|
26
|
-
version: '1.8'
|
|
27
13
|
- !ruby/object:Gem::Dependency
|
|
28
14
|
name: bundler
|
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
|
30
16
|
requirements:
|
|
31
17
|
- - "~>"
|
|
32
18
|
- !ruby/object:Gem::Version
|
|
33
|
-
version: '1
|
|
19
|
+
version: '2.1'
|
|
34
20
|
type: :development
|
|
35
21
|
prerelease: false
|
|
36
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
23
|
requirements:
|
|
38
24
|
- - "~>"
|
|
39
25
|
- !ruby/object:Gem::Version
|
|
40
|
-
version: '1
|
|
26
|
+
version: '2.1'
|
|
41
27
|
- !ruby/object:Gem::Dependency
|
|
42
28
|
name: minitest
|
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
|
44
30
|
requirements:
|
|
45
31
|
- - "~>"
|
|
46
32
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: '5.
|
|
33
|
+
version: '5.0'
|
|
48
34
|
type: :development
|
|
49
35
|
prerelease: false
|
|
50
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
37
|
requirements:
|
|
52
38
|
- - "~>"
|
|
53
39
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: '5.
|
|
40
|
+
version: '5.0'
|
|
55
41
|
- !ruby/object:Gem::Dependency
|
|
56
42
|
name: minitest-power_assert
|
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
|
58
44
|
requirements:
|
|
59
45
|
- - "~>"
|
|
60
46
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: 0.3
|
|
47
|
+
version: '0.3'
|
|
62
48
|
type: :development
|
|
63
49
|
prerelease: false
|
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
51
|
requirements:
|
|
66
52
|
- - "~>"
|
|
67
53
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: 0.3
|
|
54
|
+
version: '0.3'
|
|
69
55
|
- !ruby/object:Gem::Dependency
|
|
70
56
|
name: minitest-reporters
|
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -86,29 +72,15 @@ dependencies:
|
|
|
86
72
|
requirements:
|
|
87
73
|
- - "~>"
|
|
88
74
|
- !ruby/object:Gem::Version
|
|
89
|
-
version: '
|
|
90
|
-
type: :development
|
|
91
|
-
prerelease: false
|
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
-
requirements:
|
|
94
|
-
- - "~>"
|
|
95
|
-
- !ruby/object:Gem::Version
|
|
96
|
-
version: '10.0'
|
|
97
|
-
- !ruby/object:Gem::Dependency
|
|
98
|
-
name: rubocop
|
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
|
100
|
-
requirements:
|
|
101
|
-
- - "~>"
|
|
102
|
-
- !ruby/object:Gem::Version
|
|
103
|
-
version: 0.54.0
|
|
75
|
+
version: '13.0'
|
|
104
76
|
type: :development
|
|
105
77
|
prerelease: false
|
|
106
78
|
version_requirements: !ruby/object:Gem::Requirement
|
|
107
79
|
requirements:
|
|
108
80
|
- - "~>"
|
|
109
81
|
- !ruby/object:Gem::Version
|
|
110
|
-
version:
|
|
111
|
-
description:
|
|
82
|
+
version: '13.0'
|
|
83
|
+
description:
|
|
112
84
|
email: chocolate@cpan.org
|
|
113
85
|
executables: []
|
|
114
86
|
extensions: []
|
|
@@ -118,6 +90,7 @@ files:
|
|
|
118
90
|
- LICENSE.md
|
|
119
91
|
- README.md
|
|
120
92
|
- lib/string_splitter.rb
|
|
93
|
+
- lib/string_splitter/split.rb
|
|
121
94
|
- lib/string_splitter/version.rb
|
|
122
95
|
homepage: https://github.com/chocolateboy/string_splitter
|
|
123
96
|
licenses:
|
|
@@ -127,7 +100,7 @@ metadata:
|
|
|
127
100
|
bug_tracker_uri: https://github.com/chocolateboy/string_splitter/issues
|
|
128
101
|
changelog_uri: https://github.com/chocolateboy/string_splitter/blob/master/CHANGELOG.md
|
|
129
102
|
source_code_uri: https://github.com/chocolateboy/string_splitter
|
|
130
|
-
post_install_message:
|
|
103
|
+
post_install_message:
|
|
131
104
|
rdoc_options: []
|
|
132
105
|
require_paths:
|
|
133
106
|
- lib
|
|
@@ -135,16 +108,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
135
108
|
requirements:
|
|
136
109
|
- - ">="
|
|
137
110
|
- !ruby/object:Gem::Version
|
|
138
|
-
version: '
|
|
111
|
+
version: '2.3'
|
|
139
112
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
113
|
requirements:
|
|
141
114
|
- - ">="
|
|
142
115
|
- !ruby/object:Gem::Version
|
|
143
116
|
version: '0'
|
|
144
117
|
requirements: []
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
signing_key:
|
|
118
|
+
rubygems_version: 3.1.4
|
|
119
|
+
signing_key:
|
|
148
120
|
specification_version: 4
|
|
149
121
|
summary: String#split on steroids
|
|
150
122
|
test_files: []
|