string_splitter 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +47 -10
- data/README.md +139 -49
- data/lib/string_splitter.rb +233 -181
- data/lib/string_splitter/version.rb +1 -1
- metadata +16 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d97ccb956fe51694359cdb0d3a997d6574de088bac6ed5a8e572f92bb5ed54a
|
4
|
+
data.tar.gz: 845cefeb5efd5d01baa45759cb05ff7ae5e9a457c1f148b340bb24c038bd259e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a935a6e0f3434801dcae6a32575779e1d2eb706f8f208087a208e7fdba39ac5b49928f8b7617aec60493a8db5988a013028650f8b2ced01fadb620bfd4c77e5
|
7
|
+
data.tar.gz: d76c18a283c1e113c8bffb73b813eb6074481faa7ea339811dc9a7424a5e24fdc3efbe9afa941459e566cde8271c3cd19a97e3a37a8cf90d36a65a7bf8fd6dcf
|
data/CHANGELOG.md
CHANGED
@@ -1,37 +1,74 @@
|
|
1
|
+
## 0.6.0 - 2020-08-20
|
2
|
+
|
3
|
+
#### Breaking Changes
|
4
|
+
|
5
|
+
- `ss.split(str, " ")` is no longer treated the same as `ss.split(str)` i.e.
|
6
|
+
unlike Ruby's `String#split` (but like Crystal's), the former no longer
|
7
|
+
strips the string before splitting
|
8
|
+
- rename the `remove_empty` option `remove_empty_fields`
|
9
|
+
- rename the `exclude` option `except` (alias for `reject`)
|
10
|
+
|
11
|
+
#### Fixes
|
12
|
+
|
13
|
+
- correctly handle backreferences in delimiter patterns
|
14
|
+
|
15
|
+
#### Features
|
16
|
+
|
17
|
+
- add support for descending, negative, and infinite ranges,
|
18
|
+
e.g. `ss.split(str, ":", at: [..4, 4..., 3..1, -1..-3])` etc.
|
19
|
+
|
1
20
|
## 0.5.1 - 2018-07-01
|
2
21
|
|
22
|
+
#### Changes
|
23
|
+
|
3
24
|
- set StringSplitter::VERSION when `string_splitter.rb` is loaded
|
4
|
-
- doc tweaks
|
5
25
|
|
6
26
|
## 0.5.0 - 2018-06-26
|
7
27
|
|
28
|
+
#### Fixes
|
29
|
+
|
8
30
|
- don't treat string delimiters as patterns
|
31
|
+
|
32
|
+
#### Features
|
33
|
+
|
9
34
|
- add a `reject`/`exclude` option which rejects splits at the specified positions
|
10
35
|
- add a `select` alias for `at`
|
11
36
|
|
12
37
|
## 0.4.0 - 2018-06-24
|
13
38
|
|
14
|
-
|
39
|
+
#### Breaking Changes
|
40
|
+
|
41
|
+
- remove the `offset` alias for `split.index`
|
15
42
|
|
16
43
|
## 0.3.1 - 2018-06-24
|
17
44
|
|
18
|
-
|
45
|
+
#### Fixes
|
46
|
+
|
47
|
+
- remove trailing empty field when the separator is empty
|
48
|
+
([#1](https://github.com/chocolateboy/string_splitter/issues/1))
|
19
49
|
|
20
50
|
## 0.3.0 - 2018-06-23
|
21
51
|
|
22
|
-
|
23
|
-
|
24
|
-
|
52
|
+
#### Breaking Changes
|
53
|
+
|
54
|
+
- rename the `default_separator` option `default_delimiter`
|
25
55
|
|
26
56
|
## 0.2.0 - 2018-06-22
|
27
57
|
|
28
|
-
|
29
|
-
|
58
|
+
#### Breaking Changes
|
59
|
+
|
60
|
+
- make `index` (AKA `offset`) 0-based and add `position` (AKA `pos`) as the
|
61
|
+
1-based accessor
|
30
62
|
|
31
63
|
## 0.1.0 - 2018-06-22
|
32
64
|
|
33
|
-
|
34
|
-
|
65
|
+
#### Breaking Changes
|
66
|
+
|
67
|
+
- the block now takes a single `split` object with an `index` accessor, rather
|
68
|
+
than seperate `index` and `split` arguments
|
69
|
+
|
70
|
+
#### Features
|
71
|
+
|
35
72
|
- add support for negative indices in the value supplied to the `at` option
|
36
73
|
- add a `count` field to the split object containing the total number of splits
|
37
74
|
|
data/README.md
CHANGED
@@ -3,14 +3,15 @@
|
|
3
3
|
[](https://travis-ci.org/chocolateboy/string_splitter)
|
4
4
|
[](https://rubygems.org/gems/string_splitter)
|
5
5
|
|
6
|
-
<!--
|
7
|
-
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
6
|
+
<!-- toc -->
|
8
7
|
|
9
8
|
- [NAME](#name)
|
10
9
|
- [INSTALLATION](#installation)
|
11
10
|
- [SYNOPSIS](#synopsis)
|
12
11
|
- [DESCRIPTION](#description)
|
13
12
|
- [WHY?](#why)
|
13
|
+
- [CAVEATS](#caveats)
|
14
|
+
- [Differences from String#split](#differences-from-string%23split)
|
14
15
|
- [COMPATIBILITY](#compatibility)
|
15
16
|
- [VERSION](#version)
|
16
17
|
- [SEE ALSO](#see-also)
|
@@ -19,7 +20,7 @@
|
|
19
20
|
- [AUTHOR](#author)
|
20
21
|
- [COPYRIGHT AND LICENSE](#copyright-and-license)
|
21
22
|
|
22
|
-
<!--
|
23
|
+
<!-- tocstop -->
|
23
24
|
|
24
25
|
# NAME
|
25
26
|
|
@@ -42,16 +43,28 @@ ss = StringSplitter.new
|
|
42
43
|
**Same as `String#split`**
|
43
44
|
|
44
45
|
```ruby
|
45
|
-
ss.split("foo bar baz
|
46
|
-
ss.split("foo bar baz
|
47
|
-
|
48
|
-
|
46
|
+
ss.split("foo bar baz")
|
47
|
+
ss.split(" foo bar baz ")
|
48
|
+
# => ["foo", "bar", "baz"]
|
49
|
+
```
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
ss.split("foo", "")
|
53
|
+
ss.split("foo", //)
|
54
|
+
# => ["f", "o", "o"]
|
55
|
+
```
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
ss.split("", "...")
|
59
|
+
ss.split("", /.../)
|
60
|
+
# => []
|
49
61
|
```
|
50
62
|
|
51
63
|
**Split at the first delimiter**
|
52
64
|
|
53
65
|
```ruby
|
54
66
|
ss.split("foo:bar:baz:quux", ":", at: 1)
|
67
|
+
ss.split("foo:bar:baz:quux", ":", select: 1)
|
55
68
|
# => ["foo", "bar:baz:quux"]
|
56
69
|
```
|
57
70
|
|
@@ -65,8 +78,16 @@ ss.split("foo:bar:baz:quux", ":", at: -1)
|
|
65
78
|
**Split at multiple delimiter positions**
|
66
79
|
|
67
80
|
```ruby
|
68
|
-
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -
|
69
|
-
# => ["1", "2", "3", "4:5:6:7", "
|
81
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -1])
|
82
|
+
# => ["1", "2", "3", "4:5:6:7:8", "9"]
|
83
|
+
```
|
84
|
+
|
85
|
+
**Split at all but the first and last delimiters**
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
ss.split("1:2:3:4:5:6", ":", except: [1, -1])
|
89
|
+
ss.split("1:2:3:4:5:6", ":", reject: [1, -1])
|
90
|
+
# => ["1:2", "3", "4", "5:6"]
|
70
91
|
```
|
71
92
|
|
72
93
|
**Split from the right**
|
@@ -75,44 +96,79 @@ ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -2])
|
|
75
96
|
ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3, 5])
|
76
97
|
# => ["1:2:3:4", "5:6", "7", "8", "9"]
|
77
98
|
```
|
99
|
+
|
100
|
+
**Split with negative, descending, and infinite ranges**
|
101
|
+
|
102
|
+
```ruby
|
103
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: 4...)
|
104
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [4...])
|
105
|
+
# => ["1:2:3:4", "5", "6", "7", "8:9"]
|
106
|
+
```
|
107
|
+
|
108
|
+
```ruby
|
109
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: ..-3)
|
110
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [..-3])
|
111
|
+
# => ["1", "2", "3", "4", "5", "6", "7:8:9"]
|
112
|
+
```
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1, 5..3, -2..])
|
116
|
+
# => ["1", "2:3", "4", "5", "6:7", "8", "9"]
|
117
|
+
```
|
118
|
+
|
78
119
|
**Full control via a block**
|
79
120
|
|
80
121
|
```ruby
|
81
|
-
result = ss.split(
|
82
|
-
split.
|
122
|
+
result = ss.split("1:2:3:4:5:6:7:8", ":") do |split|
|
123
|
+
split.pos % 2 == 0
|
83
124
|
end
|
84
|
-
# => ["
|
125
|
+
# => ["1:2", "3:4", "5:6", "7:8"]
|
126
|
+
```
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
string = "banana".chars.sort.join # "aaabnn"
|
130
|
+
|
131
|
+
ss.split(string, "") do |split|
|
132
|
+
split.rhs != split.lhs
|
133
|
+
end
|
134
|
+
# => ["aaa", "b", "nn"]
|
85
135
|
```
|
86
136
|
|
87
137
|
# DESCRIPTION
|
88
138
|
|
89
|
-
Many languages have built-in `split` functions/methods for strings. They behave
|
90
|
-
(notwithstanding the occasional
|
91
|
-
and
|
139
|
+
Many languages have built-in `split` functions/methods for strings. They behave
|
140
|
+
similarly (notwithstanding the occasional
|
141
|
+
[surprise](https://chriszetter.com/blog/2017/10/29/splitting-strings/)), and
|
142
|
+
handle a few common cases e.g.:
|
92
143
|
|
93
144
|
* limiting the number of splits
|
94
145
|
* including the separator(s) in the results
|
95
146
|
* removing (some) empty fields
|
96
147
|
|
97
|
-
But, because the API is squeezed into two overloaded parameters (the delimiter
|
98
|
-
achieving the desired results can be tricky. For instance,
|
99
|
-
trailing fields (by default), it provides no
|
100
|
-
|
101
|
-
to
|
148
|
+
But, because the API is squeezed into two overloaded parameters (the delimiter
|
149
|
+
and the limit), achieving the desired results can be tricky. For instance,
|
150
|
+
while `String#split` removes empty trailing fields (by default), it provides no
|
151
|
+
way to remove *all* empty fields. Likewise, the cramped API means there's no
|
152
|
+
way to e.g. combine a limit (positive integer) with the option to preserve
|
153
|
+
empty fields (negative integer), or use backreferences in a delimiter pattern
|
102
154
|
without including its captured subexpressions in the result.
|
103
155
|
|
104
|
-
If `split` was being written from scratch, without the baggage of its legacy
|
105
|
-
it's possible that some of these options would be made explicit rather
|
106
|
-
the parameters. And, indeed, this is possible in some
|
107
|
-
e.g. in Crystal:
|
156
|
+
If `split` was being written from scratch, without the baggage of its legacy
|
157
|
+
API, it's possible that some of these options would be made explicit rather
|
158
|
+
than overloading the parameters. And, indeed, this is possible in some
|
159
|
+
implementations, e.g. in Crystal:
|
108
160
|
|
109
161
|
```ruby
|
110
|
-
":foo:bar:baz:".split(":", remove_empty: false)
|
111
|
-
|
162
|
+
":foo:bar:baz:".split(":", remove_empty: false)
|
163
|
+
# => ["", "foo", "bar", "baz", ""]
|
164
|
+
|
165
|
+
":foo:bar:baz:".split(":", remove_empty: true)
|
166
|
+
# => ["foo", "bar", "baz"]
|
112
167
|
````
|
113
168
|
|
114
|
-
StringSplitter takes this one step further by moving the configuration out of
|
115
|
-
and delegating the strategy — i.e. which splits should be
|
169
|
+
StringSplitter takes this one step further by moving the configuration out of
|
170
|
+
the method altogether and delegating the strategy — i.e. which splits should be
|
171
|
+
accepted or rejected — to a block:
|
116
172
|
|
117
173
|
```ruby
|
118
174
|
ss = StringSplitter.new
|
@@ -120,22 +176,28 @@ ss = StringSplitter.new
|
|
120
176
|
ss.split("foo:bar:baz", ":") { |split| split.index == 0 }
|
121
177
|
# => ["foo", "bar:baz"]
|
122
178
|
|
123
|
-
ss.split("foo:bar:baz", ":")
|
124
|
-
|
179
|
+
ss.split("foo:bar:baz:quux", ":") do |split|
|
180
|
+
split.position == 1 || split.position == 3
|
181
|
+
end
|
182
|
+
# => ["foo", "bar:baz", "quux"]
|
125
183
|
```
|
126
184
|
|
127
|
-
As a shortcut, the common case of splitting on delimiters at one or more
|
185
|
+
As a shortcut, the common case of splitting on delimiters at one or more
|
186
|
+
positions is supported by an option:
|
128
187
|
|
129
188
|
```ruby
|
130
|
-
ss.split(
|
189
|
+
ss.split("foo:bar:baz:quux", ":", at: [1, -1])
|
190
|
+
# => ["foo", "bar:baz", "quux"]
|
131
191
|
```
|
132
192
|
|
133
193
|
# WHY?
|
134
194
|
|
135
|
-
I wanted to split semi-structured output into fields without having to resort
|
195
|
+
I wanted to split semi-structured output into fields without having to resort
|
196
|
+
to a regex or a full-blown parser.
|
136
197
|
|
137
|
-
As an example, the nominally unstructured output of many Unix commands is often
|
138
|
-
that's tantalizingly close to being
|
198
|
+
As an example, the nominally unstructured output of many Unix commands is often
|
199
|
+
formatted in a way that's tantalizingly close to being
|
200
|
+
[machine-readable](https://en.wikipedia.org/wiki/Delimiter-separated_values),
|
139
201
|
apart from a few pesky exceptions e.g.:
|
140
202
|
|
141
203
|
```bash
|
@@ -148,8 +210,8 @@ drwxr-xr-x 3 user users 4096 Jun 19 22:56 lib
|
|
148
210
|
-rw-r--r-- 1 user users 3134 Jun 19 22:59 README.md
|
149
211
|
```
|
150
212
|
|
151
|
-
These lines can *almost* be parsed into an array of fields by splitting them on
|
152
|
-
date (columns 6-8) i.e.:
|
213
|
+
These lines can *almost* be parsed into an array of fields by splitting them on
|
214
|
+
whitespace. The exception is the date (columns 6-8) i.e.:
|
153
215
|
|
154
216
|
```ruby
|
155
217
|
line = "-rw-r--r-- 1 user users 87 Jun 18 18:16 CHANGELOG.md"
|
@@ -174,13 +236,14 @@ One way to work around this is to parse the whole line e.g.:
|
|
174
236
|
line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+ \S+) \s+ (.+)$/x)
|
175
237
|
```
|
176
238
|
|
177
|
-
But that requires us to specify *everything*. What we really want is a version
|
178
|
-
which allows us to veto splitting for the 6th and 7th delimiters
|
179
|
-
|
180
|
-
|
239
|
+
But that requires us to specify *everything*. What we really want is a version
|
240
|
+
of `split` which allows us to veto splitting for the 6th and 7th delimiters
|
241
|
+
(and to stop after the 8th delimiter) i.e. control over which splits are
|
242
|
+
accepted, rather than being restricted to the single, baked-in strategy
|
243
|
+
provided by the `limit` parameter.
|
181
244
|
|
182
|
-
By providing a simple way to accept or reject each split, StringSplitter makes
|
183
|
-
this easy to handle, either via a block:
|
245
|
+
By providing a simple way to accept or reject each split, StringSplitter makes
|
246
|
+
cases like this easy to handle, either via a block:
|
184
247
|
|
185
248
|
```ruby
|
186
249
|
ss.split(line) do |split|
|
@@ -196,14 +259,42 @@ ss.split(line, at: [1..5, 8])
|
|
196
259
|
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
197
260
|
```
|
198
261
|
|
262
|
+
# CAVEATS
|
263
|
+
|
264
|
+
## Differences from String#split
|
265
|
+
|
266
|
+
StringSplitter shares `String#split`'s behavior of trimming the string before
|
267
|
+
splitting if the delimiter is omitted, e.g.:
|
268
|
+
|
269
|
+
```ruby
|
270
|
+
" foo bar baz ".split # => ["foo", "bar", "baz"]
|
271
|
+
ss.split(" foo bar baz ") # => ["foo", "bar", "baz"]
|
272
|
+
```
|
273
|
+
|
274
|
+
However, unlike `String#split`, this doesn't also apply if a delimiter of `" "`
|
275
|
+
is supplied, e.g.:
|
276
|
+
|
277
|
+
```ruby
|
278
|
+
" foo bar baz ".split(" ") # => ["foo", "bar", "baz"]
|
279
|
+
ss.split(" foo bar baz ", " ") # => ["", "foo", "bar", "baz", ""]
|
280
|
+
```
|
281
|
+
|
282
|
+
It also doesn't apply if a custom default-delimiter is defined:
|
283
|
+
|
284
|
+
```ruby
|
285
|
+
ss = StringSplitter.new(default_delimiter: /\s+/)
|
286
|
+
ss.split(" foo bar baz ") # => ["", "foo", "bar", "baz", ""]
|
287
|
+
```
|
288
|
+
|
199
289
|
# COMPATIBILITY
|
200
290
|
|
201
|
-
StringSplitter is tested and supported on all versions of Ruby [supported by
|
202
|
-
|
291
|
+
StringSplitter is tested and supported on all versions of Ruby [supported by
|
292
|
+
the ruby-core team](https://www.ruby-lang.org/en/downloads/branches/), i.e.,
|
293
|
+
currently, Ruby 2.5 and above.
|
203
294
|
|
204
295
|
# VERSION
|
205
296
|
|
206
|
-
0.
|
297
|
+
0.6.0
|
207
298
|
|
208
299
|
# SEE ALSO
|
209
300
|
|
@@ -221,8 +312,7 @@ i.e., currently, Ruby 2.3 and above.
|
|
221
312
|
|
222
313
|
# COPYRIGHT AND LICENSE
|
223
314
|
|
224
|
-
Copyright © 2018 by chocolateboy.
|
315
|
+
Copyright © 2018-2020 by chocolateboy.
|
225
316
|
|
226
317
|
This is free software; you can redistribute it and/or modify it under the
|
227
|
-
terms of the [Artistic License 2.0](
|
228
|
-
|
318
|
+
terms of the [Artistic License 2.0](https://www.opensource.org/licenses/artistic-license-2.0.php).
|
data/lib/string_splitter.rb
CHANGED
@@ -1,21 +1,45 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'set'
|
3
4
|
require 'values'
|
4
5
|
require_relative 'string_splitter/version'
|
5
6
|
|
6
7
|
# This class extends the functionality of +String#split+ by:
|
7
8
|
#
|
8
9
|
# - providing full control over which splits are accepted or rejected
|
10
|
+
#
|
9
11
|
# - adding support for splitting from right-to-left
|
12
|
+
#
|
10
13
|
# - encapsulating splitting options/preferences in the splitter rather
|
11
14
|
# than trying to cram them into overloaded method parameters
|
12
15
|
#
|
13
16
|
# These enhancements allow splits to handle many cases that otherwise require bigger
|
14
|
-
# guns e.g. regex matching or parsing.
|
17
|
+
# guns, e.g. regex matching or parsing.
|
18
|
+
#
|
19
|
+
# Implementation-wise, we effectively use the built-in +String#split+ method as a
|
20
|
+
# tokenizer, and parse the resulting tokens into an array of Split objects with the
|
21
|
+
# following fields:
|
22
|
+
#
|
23
|
+
# - captures: separator substrings captured by parentheses in the delimiter pattern
|
24
|
+
# - count: the number of splits
|
25
|
+
# - index: the 0-based index of the split in the array
|
26
|
+
# - lhs: the string to the left of the separator (back to the previous split candidate)
|
27
|
+
# - position: the 1-based index of the split in the array (alias: pos)
|
28
|
+
# - rhs: the string to the right of the separator (up to the next split candidate)
|
29
|
+
# - rindex: the 0-based index of the split relative to the end of the array
|
30
|
+
# - rposition: the 1-based index of the split relative to the end of the array (alias: rpos)
|
31
|
+
# - separator: the string matched by the delimiter pattern/string
|
32
|
+
#
|
15
33
|
class StringSplitter
|
34
|
+
# terminology: the delimiter is what we provide and the separators are what we get
|
35
|
+
# back (if we capture them). e.g. for:
|
36
|
+
#
|
37
|
+
# ss.split("foo:bar::baz", /(\W+)/)
|
38
|
+
#
|
39
|
+
# the delimiter is /(\W)/ and the separators are ":" and "::"
|
40
|
+
|
16
41
|
ACCEPT_ALL = ->(_split) { true }
|
17
|
-
DEFAULT_DELIMITER = /\s
|
18
|
-
NO_SPLITS = []
|
42
|
+
DEFAULT_DELIMITER = /\s+/.freeze
|
19
43
|
|
20
44
|
Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator) do
|
21
45
|
def position
|
@@ -23,32 +47,78 @@ class StringSplitter
|
|
23
47
|
end
|
24
48
|
|
25
49
|
alias_method :pos, :position
|
50
|
+
|
51
|
+
# 0-based index relative to the end of the array, e.g. for 5 items:
|
52
|
+
#
|
53
|
+
# index | rindex
|
54
|
+
# ------|-------
|
55
|
+
# 0 | 4
|
56
|
+
# 1 | 3
|
57
|
+
# 2 | 2
|
58
|
+
# 3 | 1
|
59
|
+
# 4 | 0
|
60
|
+
def rindex
|
61
|
+
count - position
|
62
|
+
end
|
63
|
+
|
64
|
+
# 1-based position relative to the end of the array, e.g. for 5 items:
|
65
|
+
#
|
66
|
+
# position | rposition
|
67
|
+
# ----------|----------
|
68
|
+
# 1 | 5
|
69
|
+
# 2 | 4
|
70
|
+
# 3 | 3
|
71
|
+
# 4 | 2
|
72
|
+
# 5 | 1
|
73
|
+
def rposition
|
74
|
+
count + 1 - position
|
75
|
+
end
|
76
|
+
|
77
|
+
alias_method :rpos, :rposition
|
78
|
+
end
|
79
|
+
|
80
|
+
# simulate an enum. the value is returned by the case statement
|
81
|
+
# in the generated block if the positions match
|
82
|
+
module Action
|
83
|
+
SELECT = true
|
84
|
+
REJECT = false
|
26
85
|
end
|
27
86
|
|
87
|
+
private_constant :Action
|
88
|
+
|
28
89
|
def initialize(
|
29
90
|
default_delimiter: DEFAULT_DELIMITER,
|
30
91
|
include_captures: true,
|
31
|
-
remove_empty: false,
|
92
|
+
remove_empty: false, # TODO remove this
|
93
|
+
remove_empty_fields: remove_empty,
|
32
94
|
spread_captures: true
|
33
95
|
)
|
34
96
|
@default_delimiter = default_delimiter
|
35
97
|
@include_captures = include_captures
|
36
|
-
@
|
98
|
+
@remove_empty_fields = remove_empty_fields
|
37
99
|
@spread_captures = spread_captures
|
38
100
|
end
|
39
101
|
|
40
|
-
attr_reader
|
102
|
+
attr_reader(
|
103
|
+
:default_delimiter,
|
104
|
+
:include_captures,
|
105
|
+
:remove_empty_fields,
|
106
|
+
:spread_captures
|
107
|
+
)
|
108
|
+
|
109
|
+
# TODO remove this
|
110
|
+
alias remove_empty remove_empty_fields
|
41
111
|
|
42
112
|
def split(
|
43
113
|
string,
|
44
114
|
delimiter = @default_delimiter,
|
45
|
-
at: nil,
|
115
|
+
at: nil, # alias for select
|
116
|
+
except: nil, # alias for reject
|
46
117
|
select: at,
|
47
|
-
|
48
|
-
reject: exclude,
|
118
|
+
reject: except,
|
49
119
|
&block
|
50
120
|
)
|
51
|
-
result, splits,
|
121
|
+
result, splits, count, accept = init(
|
52
122
|
string: string,
|
53
123
|
delimiter: delimiter,
|
54
124
|
select: select,
|
@@ -56,29 +126,21 @@ class StringSplitter
|
|
56
126
|
block: block
|
57
127
|
)
|
58
128
|
|
59
|
-
|
129
|
+
return result unless splits
|
60
130
|
|
61
|
-
splits.each_with_index do |
|
62
|
-
split = Split.with(
|
131
|
+
splits.each_with_index do |hash, index|
|
132
|
+
split = Split.with(hash.merge({ count: count, index: index }))
|
63
133
|
result << split.lhs if result.empty?
|
64
134
|
|
65
|
-
if
|
66
|
-
|
67
|
-
if @spread_captures
|
68
|
-
result += split.captures
|
69
|
-
else
|
70
|
-
result << split.captures
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
|
-
result << split.rhs
|
135
|
+
if accept.call(split)
|
136
|
+
result << split.captures << split.rhs
|
75
137
|
else
|
76
|
-
#
|
138
|
+
# append the rhs
|
77
139
|
result[-1] = result[-1] + split.separator + split.rhs
|
78
140
|
end
|
79
141
|
end
|
80
142
|
|
81
|
-
result
|
143
|
+
render(result)
|
82
144
|
end
|
83
145
|
|
84
146
|
alias lsplit split
|
@@ -86,13 +148,13 @@ class StringSplitter
|
|
86
148
|
def rsplit(
|
87
149
|
string,
|
88
150
|
delimiter = @default_delimiter,
|
89
|
-
at: nil,
|
151
|
+
at: nil, # alias for select
|
152
|
+
except: nil, # alias for reject
|
90
153
|
select: at,
|
91
|
-
|
92
|
-
reject: exclude,
|
154
|
+
reject: except,
|
93
155
|
&block
|
94
156
|
)
|
95
|
-
result, splits,
|
157
|
+
result, splits, count, accept = init(
|
96
158
|
string: string,
|
97
159
|
delimiter: delimiter,
|
98
160
|
select: select,
|
@@ -100,203 +162,193 @@ class StringSplitter
|
|
100
162
|
block: block
|
101
163
|
)
|
102
164
|
|
103
|
-
|
165
|
+
return result unless splits
|
104
166
|
|
105
|
-
splits.
|
106
|
-
split = Split.with(
|
167
|
+
splits.reverse_each.with_index do |hash, index|
|
168
|
+
split = Split.with(hash.merge({ count: count, index: index }))
|
107
169
|
result.unshift(split.rhs) if result.empty?
|
108
170
|
|
109
|
-
if
|
110
|
-
|
111
|
-
|
112
|
-
result = split.captures + result
|
113
|
-
else
|
114
|
-
result.unshift(split.captures)
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
result.unshift(split.lhs)
|
171
|
+
if accept.call(split)
|
172
|
+
# [lhs + captures] + result
|
173
|
+
result.unshift(split.lhs, split.captures)
|
119
174
|
else
|
120
175
|
# prepend the lhs
|
121
176
|
result[0] = split.lhs + split.separator + result[0]
|
122
177
|
end
|
123
178
|
end
|
124
179
|
|
125
|
-
result
|
180
|
+
render(result)
|
126
181
|
end
|
127
182
|
|
128
183
|
private
|
129
184
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
end
|
185
|
+
# initialisation common to +split+ and +rsplit+
|
186
|
+
#
|
187
|
+
# takes a hash of options passed to +split+ or +rsplit+ and returns a triple with
|
188
|
+
# the following fields:
|
189
|
+
#
|
190
|
+
# - result: the array of separated strings to return from +split+ or +rsplit+.
|
191
|
+
# if the splits arry is empty, the caller returns this array immediately
|
192
|
+
# without any further processing
|
193
|
+
#
|
194
|
+
# - splits: an array of hashes containing the lhs, rhs, separator and captured
|
195
|
+
# separator substrings for each split
|
196
|
+
#
|
197
|
+
# - count: the number of splits
|
198
|
+
#
|
199
|
+
# - accept: a proc whose return value determines whether each split should be
|
200
|
+
# accepted (true) or rejected (false)
|
201
|
+
#
|
202
|
+
def init(string:, delimiter:, select:, reject:, block:)
|
203
|
+
if delimiter.equal?(DEFAULT_DELIMITER)
|
204
|
+
string = string.strip
|
205
|
+
end
|
152
206
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
207
|
+
if reject
|
208
|
+
positions = reject
|
209
|
+
action = Action::REJECT
|
210
|
+
elsif select
|
211
|
+
positions = select
|
212
|
+
action = Action::SELECT
|
159
213
|
end
|
160
214
|
|
161
|
-
|
162
|
-
end
|
215
|
+
splits = parse(string, delimiter)
|
163
216
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
#
|
168
|
-
# triple, where `result` is the return value of the method, `splits` is an array
|
169
|
-
# of hashes containing the lhs/rhs, separator and captures of each split, and
|
170
|
-
# `block` is a proc which specifies whether each split should be accepted or
|
171
|
-
# rejected
|
172
|
-
def split_init(string:, delimiter:, select:, reject:, block:)
|
173
|
-
unless (match = string.match(delimiter))
|
174
|
-
result = (@remove_empty && string.empty?) ? [] : [string]
|
175
|
-
return [result, NO_SPLITS, block]
|
217
|
+
if splits.empty?
|
218
|
+
result = string.empty? ? [] : [string]
|
219
|
+
return [result]
|
176
220
|
end
|
177
221
|
|
178
|
-
|
179
|
-
|
222
|
+
block ||= positions ? compile(positions, action, splits.length) : ACCEPT_ALL
|
223
|
+
[[], splits, splits.length, block]
|
224
|
+
end
|
180
225
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
elsif !select.empty?
|
185
|
-
positions = select
|
186
|
-
action = :select
|
226
|
+
def render(result)
|
227
|
+
if @remove_empty_fields
|
228
|
+
result.reject! { |it| it.is_a?(String) && it.empty? }
|
187
229
|
end
|
188
230
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
parts = string.split(/(#{delimiter})/, -1)
|
193
|
-
remove_trailing_empty_field!(parts, ncaptures)
|
194
|
-
result, splits = splits_for(parts, ncaptures)
|
195
|
-
block ||= positions ? match_positions(positions, action, splits.length) : ACCEPT_ALL
|
231
|
+
unless @include_captures
|
232
|
+
return result.reject! { |it| it.is_a?(Array) }
|
233
|
+
end
|
196
234
|
|
197
|
-
|
235
|
+
result.flat_map do |value|
|
236
|
+
next [value] unless value.is_a?(Array) && @spread_captures
|
237
|
+
@spread_captures == :compact ? value.compact : value
|
238
|
+
end
|
198
239
|
end
|
199
240
|
|
200
|
-
#
|
201
|
-
#
|
202
|
-
#
|
203
|
-
# e.g. to split on:
|
241
|
+
# takes a string and a delimiter pattern (regex or string) and splits it along
|
242
|
+
# the delimiter, returning an array of objects (hashes) representing each split.
|
243
|
+
# e.g. for:
|
204
244
|
#
|
205
|
-
#
|
206
|
-
# - <bar-comment> ... </bar-comment>
|
245
|
+
# parse.split("foo:bar:baz:quux", ":")
|
207
246
|
#
|
208
|
-
#
|
247
|
+
# we return:
|
209
248
|
#
|
210
|
-
#
|
249
|
+
# [
|
250
|
+
# { lhs: "foo", rhs: "bar", separator: ":", captures: [] },
|
251
|
+
# { lhs: "bar", rhs: "baz", separator: ":", captures: [] },
|
252
|
+
# { lhs: "baz", rhs: "quux", separator: ":", captures: [] },
|
253
|
+
# ]
|
211
254
|
#
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
#
|
216
|
-
# %r| ( <(\w+-comment)> [^<]* </\2-comment> ) |x
|
255
|
+
def parse(string, pattern)
|
256
|
+
result = []
|
257
|
+
start = 0
|
217
258
|
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
259
|
+
# we don't use the argument passed to the +scan+ block here because it's a
|
260
|
+
# string (the separator) if there are no captures, rather than an empty
|
261
|
+
# array. we use match.captures instead to get the array
|
262
|
+
string.scan(pattern) do
|
263
|
+
match = Regexp.last_match
|
264
|
+
index, after = match.offset(0)
|
265
|
+
separator = match[0]
|
266
|
+
|
267
|
+
# ignore empty separators at the beginning and/or end of the string
|
268
|
+
next if separator.empty? && (index.zero? || after == string.length)
|
269
|
+
|
270
|
+
lhs = string.slice(start, index - start)
|
271
|
+
result.last[:rhs] = lhs unless result.empty?
|
272
|
+
|
273
|
+
# this is correct for the last/only match, but gets updated to the next
|
274
|
+
# match's lhs for other matches
|
275
|
+
rhs = match.post_match
|
276
|
+
|
277
|
+
result << {
|
278
|
+
captures: match.captures,
|
279
|
+
lhs: lhs,
|
280
|
+
rhs: rhs,
|
281
|
+
separator: separator,
|
282
|
+
}
|
283
|
+
|
284
|
+
# move the start index (the start of the lhs) to the index after the last
|
285
|
+
# character of the separator
|
286
|
+
start = after
|
224
287
|
end
|
225
288
|
|
226
|
-
|
289
|
+
result
|
227
290
|
end
|
228
291
|
|
229
|
-
#
|
230
|
-
# on
|
292
|
+
# returns a lambda which splits at (i.e. accepts or rejects splits at, depending
|
293
|
+
# on the action) the supplied positions
|
231
294
|
#
|
232
|
-
#
|
233
|
-
#
|
234
|
-
# # => ["f", "o", "o", "b", "a", "r", ""]
|
295
|
+
# positions are preprocessed to support an additional feature: negative indices
|
296
|
+
# are translated to 1-based non-negative indices, e.g:
|
235
297
|
#
|
236
|
-
#
|
237
|
-
# # => ["f", "", "o", "", "o", "", "b", "", "a", "", "r", "", ""]
|
298
|
+
# ss.split("foo:bar:baz:quux", ":", at: -1)
|
238
299
|
#
|
239
|
-
#
|
240
|
-
# # => ["f", "", "", "o", "", "", "o", "", "", "b", "", "", "a", "", "", "r", "", "", ""]
|
300
|
+
# translates to:
|
241
301
|
#
|
242
|
-
#
|
243
|
-
#
|
244
|
-
#
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
#
|
302
|
+
# ss.split("foo:bar:baz:quux", ":", at: 3)
|
303
|
+
#
|
304
|
+
# and
|
305
|
+
#
|
306
|
+
# ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
|
307
|
+
# ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
|
308
|
+
#
|
309
|
+
# translate to:
|
310
|
+
#
|
311
|
+
# ss.split("foo:bar:baz:quux", ":", at: 6..8)
|
312
|
+
#
|
313
|
+
def compile(positions, action, nsplits)
|
314
|
+
# XXX note: we don't use modulo, because we don't want
|
315
|
+
# out-of-bounds indices to silently work, e.g. we don't want:
|
255
316
|
#
|
256
|
-
#
|
317
|
+
# ss.split("foo:bar:baz:quux", ":", at: -42)
|
257
318
|
#
|
258
|
-
#
|
259
|
-
# + ncaptures
|
260
|
-
# + 1 (separator)
|
319
|
+
# to mysteriously match when the index/position is 0/1
|
261
320
|
#
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
# to mysteriously match when the position is 2
|
289
|
-
|
290
|
-
nsplits + 1 + position
|
321
|
+
resolve = ->(int) { int.negative? ? nsplits + 1 + int : int }
|
322
|
+
|
323
|
+
# don't use Array(...) to wrap these as we don't want to convert ranges
|
324
|
+
positions = positions.is_a?(Array) ? positions : [positions]
|
325
|
+
|
326
|
+
positions = positions.map do |position|
|
327
|
+
if position.is_a?(Integer)
|
328
|
+
resolve[position]
|
329
|
+
elsif position.is_a?(Range)
|
330
|
+
rbegin = position.begin
|
331
|
+
rend = position.end
|
332
|
+
rexc = position.exclude_end?
|
333
|
+
|
334
|
+
if rbegin.nil?
|
335
|
+
Range.new(1, resolve[rend], rexc)
|
336
|
+
elsif rend.nil?
|
337
|
+
Range.new(resolve[rbegin], nsplits, rexc)
|
338
|
+
elsif rbegin.negative? || rend.negative? || (rend - rbegin).negative?
|
339
|
+
from = resolve[rbegin]
|
340
|
+
to = resolve[rend]
|
341
|
+
to < from ? Range.new(to, from, rexc) : Range.new(from, to, rexc)
|
342
|
+
else
|
343
|
+
position
|
344
|
+
end
|
345
|
+
elsif position.is_a?(Set)
|
346
|
+
position.map { |it| resolve[it] }.to_set
|
291
347
|
else
|
292
348
|
position
|
293
349
|
end
|
294
350
|
end
|
295
351
|
|
296
|
-
|
297
|
-
|
298
|
-
lambda do |split|
|
299
|
-
case split.position when *positions then match else !match end
|
300
|
-
end
|
352
|
+
->(split) { case split.position when *positions then action else !action end }
|
301
353
|
end
|
302
354
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chocolateboy
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: values
|
@@ -30,42 +30,42 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1
|
33
|
+
version: '2.1'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1
|
40
|
+
version: '2.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '5.
|
47
|
+
version: '5.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '5.
|
54
|
+
version: '5.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: minitest-power_assert
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.3
|
61
|
+
version: '0.3'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.3
|
68
|
+
version: '0.3'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: minitest-reporters
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -86,29 +86,15 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
89
|
+
version: '13.0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
97
|
-
|
98
|
-
name: rubocop
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: 0.54.0
|
104
|
-
type: :development
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - "~>"
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: 0.54.0
|
111
|
-
description:
|
96
|
+
version: '13.0'
|
97
|
+
description:
|
112
98
|
email: chocolate@cpan.org
|
113
99
|
executables: []
|
114
100
|
extensions: []
|
@@ -127,7 +113,7 @@ metadata:
|
|
127
113
|
bug_tracker_uri: https://github.com/chocolateboy/string_splitter/issues
|
128
114
|
changelog_uri: https://github.com/chocolateboy/string_splitter/blob/master/CHANGELOG.md
|
129
115
|
source_code_uri: https://github.com/chocolateboy/string_splitter
|
130
|
-
post_install_message:
|
116
|
+
post_install_message:
|
131
117
|
rdoc_options: []
|
132
118
|
require_paths:
|
133
119
|
- lib
|
@@ -135,16 +121,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
135
121
|
requirements:
|
136
122
|
- - ">="
|
137
123
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
124
|
+
version: '2.3'
|
139
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
126
|
requirements:
|
141
127
|
- - ">="
|
142
128
|
- !ruby/object:Gem::Version
|
143
129
|
version: '0'
|
144
130
|
requirements: []
|
145
|
-
|
146
|
-
|
147
|
-
signing_key:
|
131
|
+
rubygems_version: 3.1.4
|
132
|
+
signing_key:
|
148
133
|
specification_version: 4
|
149
134
|
summary: String#split on steroids
|
150
135
|
test_files: []
|