string_splitter 0.3.1 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +67 -8
- data/README.md +171 -53
- data/lib/string_splitter.rb +272 -163
- data/lib/string_splitter/version.rb +1 -1
- metadata +16 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 400534de6c3143ef81b2ad46a3a6432b7d83ef0900024ebdde3f06a4e1714890
|
4
|
+
data.tar.gz: 643f5af7b9e13321dfa97b045b124d0c5ea576868b13141c264122bc96baea5e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 35bed8fe69b33314813fbd68a8da0e8f4799b7891275ac601b157caeb0e0a3780f37ec7e7876d808b8dfcbfdf7527f45c3af0dc0d679e133865e96949a1d9ce3
|
7
|
+
data.tar.gz: 8186e40d57654daf1a481ab74c128910f7aa346bc343a0a9933dc39b7cceeb204c1a55ac39b39321df46f7d02420fd87f93dd4a708be0a985d94833df018da87
|
data/CHANGELOG.md
CHANGED
@@ -1,22 +1,81 @@
|
|
1
|
+
## 0.7.0 - 2020-08-21
|
2
|
+
|
3
|
+
#### Breaking Changes
|
4
|
+
|
5
|
+
- `String#split` incompatibility: we no longer trim the string (with
|
6
|
+
`String#strip`) before splitting if the delimiter is omitted
|
7
|
+
|
8
|
+
## 0.6.0 - 2020-08-20
|
9
|
+
|
10
|
+
#### Breaking Changes
|
11
|
+
|
12
|
+
- `ss.split(str, " ")` is no longer treated the same as `ss.split(str)` i.e.
|
13
|
+
unlike Ruby's `String#split`, the former no longer strips the string before
|
14
|
+
splitting
|
15
|
+
- rename the `remove_empty` option `remove_empty_fields`
|
16
|
+
- rename the `exclude` option `except` (alias for `reject`)
|
17
|
+
|
18
|
+
#### Features
|
19
|
+
|
20
|
+
- add support for descending, negative, and infinite ranges,
|
21
|
+
e.g. `ss.split(str, ":", at: [..4, 4..., 3..1, -1..-3])` etc.
|
22
|
+
|
23
|
+
#### Fixes
|
24
|
+
|
25
|
+
- correctly handle backreferences in delimiter patterns
|
26
|
+
|
27
|
+
## 0.5.1 - 2018-07-01
|
28
|
+
|
29
|
+
#### Changes
|
30
|
+
|
31
|
+
- set StringSplitter::VERSION when `string_splitter.rb` is loaded
|
32
|
+
|
33
|
+
## 0.5.0 - 2018-06-26
|
34
|
+
|
35
|
+
#### Features
|
36
|
+
|
37
|
+
- add a `reject`/`exclude` option which rejects splits at the specified positions
|
38
|
+
- add a `select` alias for `at`
|
39
|
+
|
40
|
+
#### Fixes
|
41
|
+
|
42
|
+
- don't treat string delimiters as patterns
|
43
|
+
|
44
|
+
## 0.4.0 - 2018-06-24
|
45
|
+
|
46
|
+
#### Breaking Changes
|
47
|
+
|
48
|
+
- remove the `offset` alias for `split.index`
|
49
|
+
|
1
50
|
## 0.3.1 - 2018-06-24
|
2
51
|
|
3
|
-
|
52
|
+
#### Fixes
|
53
|
+
|
54
|
+
- remove trailing empty field when the separator is empty
|
55
|
+
([#1](https://github.com/chocolateboy/string_splitter/issues/1))
|
4
56
|
|
5
57
|
## 0.3.0 - 2018-06-23
|
6
58
|
|
7
|
-
|
8
|
-
|
9
|
-
|
59
|
+
#### Breaking Changes
|
60
|
+
|
61
|
+
- rename the `default_separator` option `default_delimiter`
|
10
62
|
|
11
63
|
## 0.2.0 - 2018-06-22
|
12
64
|
|
13
|
-
|
14
|
-
|
65
|
+
#### Breaking Changes
|
66
|
+
|
67
|
+
- make `index` (AKA `offset`) 0-based and add `position` (AKA `pos`) as the
|
68
|
+
1-based accessor
|
15
69
|
|
16
70
|
## 0.1.0 - 2018-06-22
|
17
71
|
|
18
|
-
|
19
|
-
|
72
|
+
#### Breaking Changes
|
73
|
+
|
74
|
+
- the block now takes a single `split` object with an `index` accessor, rather
|
75
|
+
than separate `index` and `split` arguments
|
76
|
+
|
77
|
+
#### Features
|
78
|
+
|
20
79
|
- add support for negative indices in the value supplied to the `at` option
|
21
80
|
- add a `count` field to the split object containing the total number of splits
|
22
81
|
|
data/README.md
CHANGED
@@ -3,14 +3,16 @@
|
|
3
3
|
[![Build Status](https://travis-ci.org/chocolateboy/string_splitter.svg)](https://travis-ci.org/chocolateboy/string_splitter)
|
4
4
|
[![Gem Version](https://img.shields.io/gem/v/string_splitter.svg)](https://rubygems.org/gems/string_splitter)
|
5
5
|
|
6
|
-
<!--
|
7
|
-
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
6
|
+
<!-- toc -->
|
8
7
|
|
9
8
|
- [NAME](#name)
|
10
9
|
- [INSTALLATION](#installation)
|
11
10
|
- [SYNOPSIS](#synopsis)
|
12
11
|
- [DESCRIPTION](#description)
|
13
12
|
- [WHY?](#why)
|
13
|
+
- [CAVEATS](#caveats)
|
14
|
+
- [Differences from String#split](#differences-from-string%23split)
|
15
|
+
- [COMPATIBILITY](#compatibility)
|
14
16
|
- [VERSION](#version)
|
15
17
|
- [SEE ALSO](#see-also)
|
16
18
|
- [Gems](#gems)
|
@@ -18,7 +20,7 @@
|
|
18
20
|
- [AUTHOR](#author)
|
19
21
|
- [COPYRIGHT AND LICENSE](#copyright-and-license)
|
20
22
|
|
21
|
-
<!--
|
23
|
+
<!-- tocstop -->
|
22
24
|
|
23
25
|
# NAME
|
24
26
|
|
@@ -36,65 +38,128 @@ gem "string_splitter"
|
|
36
38
|
require "string_splitter"
|
37
39
|
|
38
40
|
ss = StringSplitter.new
|
41
|
+
```
|
42
|
+
|
43
|
+
**Same as `String#split`**
|
39
44
|
|
40
|
-
|
41
|
-
ss.split("foo bar baz
|
42
|
-
ss.split("foo bar baz
|
43
|
-
ss.split("foo bar baz
|
44
|
-
# => ["foo", "bar", "baz"
|
45
|
+
```ruby
|
46
|
+
ss.split("foo bar baz")
|
47
|
+
ss.split("foo bar baz", " ")
|
48
|
+
ss.split("foo bar baz", /\s+/)
|
49
|
+
# => ["foo", "bar", "baz"]
|
50
|
+
|
51
|
+
ss.split("foo", "")
|
52
|
+
ss.split("foo", //)
|
53
|
+
# => ["f", "o", "o"]
|
54
|
+
|
55
|
+
ss.split("", "...")
|
56
|
+
ss.split("", /.../)
|
57
|
+
# => []
|
58
|
+
```
|
45
59
|
|
46
|
-
|
60
|
+
**Split at the first delimiter**
|
61
|
+
|
62
|
+
```ruby
|
47
63
|
ss.split("foo:bar:baz:quux", ":", at: 1)
|
64
|
+
ss.split("foo:bar:baz:quux", ":", select: 1)
|
48
65
|
# => ["foo", "bar:baz:quux"]
|
66
|
+
```
|
49
67
|
|
50
|
-
|
68
|
+
**Split at the last delimiter**
|
69
|
+
|
70
|
+
```ruby
|
51
71
|
ss.split("foo:bar:baz:quux", ":", at: -1)
|
52
72
|
# => ["foo:bar:baz", "quux"]
|
73
|
+
```
|
74
|
+
|
75
|
+
**Split at multiple delimiter positions**
|
76
|
+
|
77
|
+
```ruby
|
78
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -1])
|
79
|
+
# => ["1", "2", "3", "4:5:6:7:8", "9"]
|
80
|
+
```
|
53
81
|
|
54
|
-
|
55
|
-
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -2])
|
56
|
-
# => ["1", "2", "3", "4:5:6:7", "8:9"]
|
82
|
+
**Split at all but the first and last delimiters**
|
57
83
|
|
58
|
-
|
84
|
+
```ruby
|
85
|
+
ss.split("1:2:3:4:5:6", ":", except: [1, -1])
|
86
|
+
ss.split("1:2:3:4:5:6", ":", reject: [1, -1])
|
87
|
+
# => ["1:2", "3", "4", "5:6"]
|
88
|
+
```
|
89
|
+
|
90
|
+
**Split from the right**
|
91
|
+
|
92
|
+
```ruby
|
59
93
|
ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3, 5])
|
60
94
|
# => ["1:2:3:4", "5:6", "7", "8", "9"]
|
95
|
+
```
|
96
|
+
|
97
|
+
**Split with negative, descending, and infinite ranges**
|
98
|
+
|
99
|
+
```ruby
|
100
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: ..-3)
|
101
|
+
# => ["1", "2", "3", "4", "5", "6", "7:8:9"]
|
102
|
+
|
103
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: 4...)
|
104
|
+
# => ["1:2:3:4", "5", "6", "7", "8:9"]
|
105
|
+
|
106
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1, 5..3, -2..])
|
107
|
+
# => ["1", "2:3", "4", "5", "6:7", "8", "9"]
|
108
|
+
```
|
109
|
+
|
110
|
+
**Full control via a block**
|
61
111
|
|
62
|
-
|
63
|
-
result = ss.split(
|
64
|
-
split.
|
112
|
+
```ruby
|
113
|
+
result = ss.split("1:2:3:4:5:6:7:8", ":") do |split|
|
114
|
+
split.pos % 2 == 0
|
65
115
|
end
|
66
|
-
# => ["
|
116
|
+
# => ["1:2", "3:4", "5:6", "7:8"]
|
117
|
+
```
|
118
|
+
|
119
|
+
```ruby
|
120
|
+
string = "banana".chars.sort.join # "aaabnn"
|
121
|
+
|
122
|
+
ss.split(string, "") do |split|
|
123
|
+
split.rhs != split.lhs
|
124
|
+
end
|
125
|
+
# => ["aaa", "b", "nn"]
|
67
126
|
```
|
68
127
|
|
69
128
|
# DESCRIPTION
|
70
129
|
|
71
|
-
Many languages have built-in
|
72
|
-
(notwithstanding the occasional
|
73
|
-
and
|
130
|
+
Many languages have built-in `split` functions/methods for strings. They behave
|
131
|
+
similarly (notwithstanding the occasional
|
132
|
+
[surprise](https://chriszetter.com/blog/2017/10/29/splitting-strings/)), and
|
133
|
+
handle a few common cases e.g.:
|
74
134
|
|
75
135
|
* limiting the number of splits
|
76
|
-
* including the
|
136
|
+
* including the separator(s) in the results
|
77
137
|
* removing (some) empty fields
|
78
138
|
|
79
|
-
But, because the API is squeezed into two overloaded parameters (the delimiter
|
80
|
-
achieving the desired
|
81
|
-
trailing fields (by default), it provides no
|
82
|
-
|
83
|
-
to
|
139
|
+
But, because the API is squeezed into two overloaded parameters (the delimiter
|
140
|
+
and the limit), achieving the desired results can be tricky. For instance,
|
141
|
+
while `String#split` removes empty trailing fields (by default), it provides no
|
142
|
+
way to remove *all* empty fields. Likewise, the cramped API means there's no
|
143
|
+
way to e.g. combine a limit (positive integer) with the option to preserve
|
144
|
+
empty fields (negative integer), or use backreferences in a delimiter pattern
|
84
145
|
without including its captured subexpressions in the result.
|
85
146
|
|
86
|
-
If `split` was being written from scratch, without the baggage of its legacy
|
87
|
-
it's possible that some of these options would be made explicit rather
|
88
|
-
the parameters. And, indeed, this is possible in some
|
89
|
-
e.g. in Crystal:
|
147
|
+
If `split` was being written from scratch, without the baggage of its legacy
|
148
|
+
API, it's possible that some of these options would be made explicit rather
|
149
|
+
than overloading the parameters. And, indeed, this is possible in some
|
150
|
+
implementations, e.g. in Crystal:
|
90
151
|
|
91
152
|
```ruby
|
92
|
-
":foo:bar:baz:".split(":", remove_empty: false)
|
93
|
-
|
153
|
+
":foo:bar:baz:".split(":", remove_empty: false)
|
154
|
+
# => ["", "foo", "bar", "baz", ""]
|
155
|
+
|
156
|
+
":foo:bar:baz:".split(":", remove_empty: true)
|
157
|
+
# => ["foo", "bar", "baz"]
|
94
158
|
````
|
95
159
|
|
96
|
-
StringSplitter takes this one step further by moving the configuration out of
|
97
|
-
and delegating the strategy — i.e. which splits should be
|
160
|
+
StringSplitter takes this one step further by moving the configuration out of
|
161
|
+
the method altogether and delegating the strategy — i.e. which splits should be
|
162
|
+
accepted or rejected — to a block:
|
98
163
|
|
99
164
|
```ruby
|
100
165
|
ss = StringSplitter.new
|
@@ -102,22 +167,32 @@ ss = StringSplitter.new
|
|
102
167
|
ss.split("foo:bar:baz", ":") { |split| split.index == 0 }
|
103
168
|
# => ["foo", "bar:baz"]
|
104
169
|
|
105
|
-
ss.split("foo:bar:baz", ":")
|
106
|
-
|
170
|
+
ss.split("foo:bar:baz:quux", ":") do |split|
|
171
|
+
split.position == 1 || split.position == 3
|
172
|
+
end
|
173
|
+
# => ["foo", "bar:baz", "quux"]
|
107
174
|
```
|
108
175
|
|
109
|
-
As a shortcut, the common case of splitting
|
176
|
+
As a shortcut, the common case of splitting (or not splitting) at one or more
|
177
|
+
positions is supported by dedicated options:
|
110
178
|
|
111
179
|
```ruby
|
112
|
-
ss.split(
|
180
|
+
ss.split("foo:bar:baz:quux", ":", select: [1, -1])
|
181
|
+
# => ["foo", "bar:baz", "quux"]
|
182
|
+
|
183
|
+
ss.split("foo:bar:baz:quux", ":", reject: [1, -1])
|
184
|
+
# => ["foo:bar", "baz:quux"]
|
113
185
|
```
|
114
186
|
|
115
187
|
# WHY?
|
116
188
|
|
117
|
-
I wanted to split semi-structured output into fields without having to resort
|
189
|
+
I wanted to split semi-structured output into fields without having to resort
|
190
|
+
to a regex or a full-blown parser.
|
118
191
|
|
119
|
-
As an example, the nominally unstructured output of many Unix commands is often
|
120
|
-
that's tantalizingly close to being
|
192
|
+
As an example, the nominally unstructured output of many Unix commands is often
|
193
|
+
formatted in a way that's tantalizingly close to being
|
194
|
+
[machine-readable](https://en.wikipedia.org/wiki/Delimiter-separated_values),
|
195
|
+
apart from a few pesky exceptions e.g.:
|
121
196
|
|
122
197
|
```bash
|
123
198
|
$ ls -l
|
@@ -129,8 +204,8 @@ drwxr-xr-x 3 user users 4096 Jun 19 22:56 lib
|
|
129
204
|
-rw-r--r-- 1 user users 3134 Jun 19 22:59 README.md
|
130
205
|
```
|
131
206
|
|
132
|
-
These lines can *almost* be parsed into an array of fields by splitting them on
|
133
|
-
date (columns 6-8) i.e.:
|
207
|
+
These lines can *almost* be parsed into an array of fields by splitting them on
|
208
|
+
whitespace. The exception is the date (columns 6-8) i.e.:
|
134
209
|
|
135
210
|
```ruby
|
136
211
|
line = "-rw-r--r-- 1 user users 87 Jun 18 18:16 CHANGELOG.md"
|
@@ -155,13 +230,14 @@ One way to work around this is to parse the whole line e.g.:
|
|
155
230
|
line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+ \S+) \s+ (.+)$/x)
|
156
231
|
```
|
157
232
|
|
158
|
-
But that requires us to specify *everything*. What we really want is a version
|
159
|
-
which allows us to veto splitting for the 6th and 7th delimiters
|
160
|
-
|
161
|
-
|
233
|
+
But that requires us to specify *everything*. What we really want is a version
|
234
|
+
of `split` which allows us to veto splitting for the 6th and 7th delimiters
|
235
|
+
(and to stop after the 8th delimiter) i.e. control over which splits are
|
236
|
+
accepted, rather than being restricted to the single, baked-in strategy
|
237
|
+
provided by the `limit` parameter.
|
162
238
|
|
163
|
-
By providing a simple way to accept or reject each split, StringSplitter makes
|
164
|
-
this easy to handle, either via a block:
|
239
|
+
By providing a simple way to accept or reject each split, StringSplitter makes
|
240
|
+
cases like this easy to handle, either via a block:
|
165
241
|
|
166
242
|
```ruby
|
167
243
|
ss.split(line) do |split|
|
@@ -177,9 +253,51 @@ ss.split(line, at: [1..5, 8])
|
|
177
253
|
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
178
254
|
```
|
179
255
|
|
256
|
+
# CAVEATS
|
257
|
+
|
258
|
+
## Differences from String#split
|
259
|
+
|
260
|
+
Unlike `String#split`, StringSplitter doesn't trim the string before splitting
|
261
|
+
(with `String#strip`) if the delimiter is omitted or a single space, e.g.:
|
262
|
+
|
263
|
+
```ruby
|
264
|
+
" foo bar baz ".split # => ["foo", "bar", "baz"]
|
265
|
+
" foo bar baz ".split(" ") # => ["foo", "bar", "baz"]
|
266
|
+
|
267
|
+
ss.split(" foo bar baz ") # => ["", "foo", "bar", "baz", ""]
|
268
|
+
ss.split(" foo bar baz ", " ") # => ["", "foo", "bar", "baz", ""]
|
269
|
+
```
|
270
|
+
|
271
|
+
`String#split` omits the `nil` values of unmatched optional captures:
|
272
|
+
|
273
|
+
```ruby
|
274
|
+
"foo:bar:baz".scan(/(:)|(-)/) # => [[":", nil], [":", nil]]
|
275
|
+
"foo:bar:baz".split(/(:)|(-)/) # => ["foo", ":", "bar", ":", "baz"]
|
276
|
+
```
|
277
|
+
|
278
|
+
StringSplitter preserves them by default (if `include_captures` is true, as it
|
279
|
+
is by default), though they can be omitted from spread captures by passing
|
280
|
+
`:compact` as the value of the `spread_captures` option:
|
281
|
+
|
282
|
+
```ruby
|
283
|
+
s1 = StringSplitter.new(spread_captures: true)
|
284
|
+
s2 = StringSplitter.new(spread_captures: false)
|
285
|
+
s3 = StringSplitter.new(spread_captures: :compact)
|
286
|
+
|
287
|
+
s1.split("foo:bar:baz", /(:)|(-)/) # => ["foo", ":", nil, "bar", ":", nil, "baz"]
|
288
|
+
s2.split("foo:bar:baz", /(:)|(-)/) # => ["foo", [":", nil], "bar", [":", nil], "baz"]
|
289
|
+
s3.split("foo:bar:baz", /(:)|(-)/) # => ["foo", ":", "bar", ":", "baz"]
|
290
|
+
```
|
291
|
+
|
292
|
+
# COMPATIBILITY
|
293
|
+
|
294
|
+
StringSplitter is tested and supported on all versions of Ruby [supported by
|
295
|
+
the ruby-core team](https://www.ruby-lang.org/en/downloads/branches/), i.e.,
|
296
|
+
currently, Ruby 2.5 and above.
|
297
|
+
|
180
298
|
# VERSION
|
181
299
|
|
182
|
-
0.
|
300
|
+
0.7.0
|
183
301
|
|
184
302
|
# SEE ALSO
|
185
303
|
|
@@ -197,7 +315,7 @@ ss.split(line, at: [1..5, 8])
|
|
197
315
|
|
198
316
|
# COPYRIGHT AND LICENSE
|
199
317
|
|
200
|
-
Copyright © 2018 by chocolateboy.
|
318
|
+
Copyright © 2018-2020 by chocolateboy.
|
201
319
|
|
202
320
|
This is free software; you can redistribute it and/or modify it under the
|
203
|
-
terms of the [Artistic License 2.0](
|
321
|
+
terms of the [Artistic License 2.0](https://www.opensource.org/licenses/artistic-license-2.0.php).
|
data/lib/string_splitter.rb
CHANGED
@@ -1,250 +1,359 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'set'
|
3
4
|
require 'values'
|
4
5
|
|
6
|
+
require_relative 'string_splitter/version'
|
7
|
+
|
5
8
|
# This class extends the functionality of +String#split+ by:
|
6
9
|
#
|
7
10
|
# - providing full control over which splits are accepted or rejected
|
11
|
+
#
|
8
12
|
# - adding support for splitting from right-to-left
|
9
|
-
#
|
10
|
-
#
|
13
|
+
#
|
14
|
+
# - encapsulating splitting options/preferences in the splitter rather
|
15
|
+
# than trying to cram them into overloaded method parameters
|
11
16
|
#
|
12
17
|
# These enhancements allow splits to handle many cases that otherwise require bigger
|
13
|
-
# guns e.g. regex matching or parsing.
|
18
|
+
# guns, e.g. regex matching or parsing.
|
19
|
+
#
|
20
|
+
# Implementation-wise, we split the string with a scanner which works in a similar
|
21
|
+
# way to +String#split+ and parse the resulting tokens into an array of Split objects
|
22
|
+
# with the following fields:
|
23
|
+
#
|
24
|
+
# - captures: separator substrings captured by parentheses in the delimiter pattern
|
25
|
+
# - count: the number of splits
|
26
|
+
# - index: the 0-based index of the split in the array
|
27
|
+
# - lhs: the string to the left of the separator (back to the previous split candidate)
|
28
|
+
# - position: the 1-based index of the split in the array (alias: pos)
|
29
|
+
# - rhs: the string to the right of the separator (up to the next split candidate)
|
30
|
+
# - rindex: the 0-based index of the split relative to the end of the array
|
31
|
+
# - rposition: the 1-based index of the split relative to the end of the array (alias: rpos)
|
32
|
+
# - separator: the string matched by the delimiter pattern/string
|
33
|
+
#
|
14
34
|
class StringSplitter
|
15
|
-
|
16
|
-
|
17
|
-
|
35
|
+
# terminology: the delimiter is what we provide and the separators are what we get
|
36
|
+
# back (if we capture them). e.g. for:
|
37
|
+
#
|
38
|
+
# ss.split("foo:bar::baz", /(\W+)/)
|
39
|
+
#
|
40
|
+
# the delimiter is /(\W)/ and the separators are ":" and "::"
|
41
|
+
|
42
|
+
ACCEPT_ALL = ->(_split) { true }
|
43
|
+
DEFAULT_DELIMITER = /\s+/.freeze
|
44
|
+
REMOVE = [].freeze
|
18
45
|
|
19
46
|
Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator) do
|
20
47
|
def position
|
21
48
|
index + 1
|
22
49
|
end
|
23
50
|
|
24
|
-
alias_method :offset, :index
|
25
51
|
alias_method :pos, :position
|
52
|
+
|
53
|
+
# 0-based index relative to the end of the array, e.g. for 5 items:
|
54
|
+
#
|
55
|
+
# index | rindex
|
56
|
+
# ------|-------
|
57
|
+
# 0 | 4
|
58
|
+
# 1 | 3
|
59
|
+
# 2 | 2
|
60
|
+
# 3 | 1
|
61
|
+
# 4 | 0
|
62
|
+
def rindex
|
63
|
+
count - position
|
64
|
+
end
|
65
|
+
|
66
|
+
# 1-based position relative to the end of the array, e.g. for 5 items:
|
67
|
+
#
|
68
|
+
# position | rposition
|
69
|
+
# ----------|----------
|
70
|
+
# 1 | 5
|
71
|
+
# 2 | 4
|
72
|
+
# 3 | 3
|
73
|
+
# 4 | 2
|
74
|
+
# 5 | 1
|
75
|
+
def rposition
|
76
|
+
count + 1 - position
|
77
|
+
end
|
78
|
+
|
79
|
+
alias_method :rpos, :rposition
|
80
|
+
end
|
81
|
+
|
82
|
+
# simulate an enum. the value is returned by the case statement
|
83
|
+
# in the generated block if the positions match
|
84
|
+
module Action
|
85
|
+
SELECT = true
|
86
|
+
REJECT = false
|
26
87
|
end
|
27
88
|
|
89
|
+
private_constant :Action
|
90
|
+
|
28
91
|
def initialize(
|
29
92
|
default_delimiter: DEFAULT_DELIMITER,
|
30
93
|
include_captures: true,
|
31
|
-
remove_empty: false,
|
94
|
+
remove_empty: false, # TODO remove this
|
95
|
+
remove_empty_fields: remove_empty,
|
32
96
|
spread_captures: true
|
33
97
|
)
|
34
98
|
@default_delimiter = default_delimiter
|
35
99
|
@include_captures = include_captures
|
36
|
-
@
|
100
|
+
@remove_empty_fields = remove_empty_fields
|
37
101
|
@spread_captures = spread_captures
|
38
102
|
end
|
39
103
|
|
40
|
-
attr_reader
|
41
|
-
|
42
|
-
|
43
|
-
|
104
|
+
attr_reader(
|
105
|
+
:default_delimiter,
|
106
|
+
:include_captures,
|
107
|
+
:remove_empty_fields,
|
108
|
+
:spread_captures
|
109
|
+
)
|
44
110
|
|
45
|
-
|
46
|
-
|
111
|
+
# TODO remove this
|
112
|
+
alias remove_empty remove_empty_fields
|
113
|
+
|
114
|
+
def split(
|
115
|
+
string,
|
116
|
+
delimiter = @default_delimiter,
|
117
|
+
at: nil, # alias for select
|
118
|
+
except: nil, # alias for reject
|
119
|
+
select: at,
|
120
|
+
reject: except,
|
121
|
+
&block
|
122
|
+
)
|
123
|
+
result, splits, count, accept = init(
|
124
|
+
string: string,
|
125
|
+
delimiter: delimiter,
|
126
|
+
select: select,
|
127
|
+
reject: reject,
|
128
|
+
block: block
|
129
|
+
)
|
130
|
+
|
131
|
+
return result unless splits
|
132
|
+
|
133
|
+
splits.each_with_index do |hash, index|
|
134
|
+
split = Split.with(hash.merge({ count: count, index: index }))
|
47
135
|
result << split.lhs if result.empty?
|
48
136
|
|
49
|
-
if
|
50
|
-
|
51
|
-
if @spread_captures
|
52
|
-
result += split.captures
|
53
|
-
else
|
54
|
-
result << split.captures
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
result << split.rhs
|
137
|
+
if accept.call(split)
|
138
|
+
result << split.captures << split.rhs
|
59
139
|
else
|
60
140
|
# append the rhs
|
61
141
|
result[-1] = result[-1] + split.separator + split.rhs
|
62
142
|
end
|
63
143
|
end
|
64
144
|
|
65
|
-
result
|
145
|
+
render(result)
|
66
146
|
end
|
67
147
|
|
68
148
|
alias lsplit split
|
69
149
|
|
70
|
-
def rsplit(
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
150
|
+
def rsplit(
|
151
|
+
string,
|
152
|
+
delimiter = @default_delimiter,
|
153
|
+
at: nil, # alias for select
|
154
|
+
except: nil, # alias for reject
|
155
|
+
select: at,
|
156
|
+
reject: except,
|
157
|
+
&block
|
158
|
+
)
|
159
|
+
result, splits, count, accept = init(
|
160
|
+
string: string,
|
161
|
+
delimiter: delimiter,
|
162
|
+
select: select,
|
163
|
+
reject: reject,
|
164
|
+
block: block
|
165
|
+
)
|
166
|
+
|
167
|
+
return result unless splits
|
168
|
+
|
169
|
+
splits.reverse_each.with_index do |hash, index|
|
170
|
+
split = Split.with(hash.merge({ count: count, index: index }))
|
75
171
|
result.unshift(split.rhs) if result.empty?
|
76
172
|
|
77
|
-
if
|
78
|
-
|
79
|
-
|
80
|
-
result = split.captures + result
|
81
|
-
else
|
82
|
-
result.unshift(split.captures)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
result.unshift(split.lhs)
|
173
|
+
if accept.call(split)
|
174
|
+
# [lhs + captures] + result
|
175
|
+
result.unshift(split.lhs, split.captures)
|
87
176
|
else
|
88
177
|
# prepend the lhs
|
89
178
|
result[0] = split.lhs + split.separator + result[0]
|
90
179
|
end
|
91
180
|
end
|
92
181
|
|
93
|
-
result
|
182
|
+
render(result)
|
94
183
|
end
|
95
184
|
|
96
185
|
private
|
97
186
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
187
|
+
# initialisation common to +split+ and +rsplit+
|
188
|
+
#
|
189
|
+
# takes a hash of options passed to +split+ or +rsplit+ and returns a tuple with
|
190
|
+
# the following fields:
|
191
|
+
#
|
192
|
+
# - result: the array of separated strings to return from +split+ or +rsplit+.
|
193
|
+
# if the splits arry is empty, the caller returns this array immediately
|
194
|
+
# without any further processing
|
195
|
+
#
|
196
|
+
# - splits: an array of hashes containing the lhs, rhs, separator and captured
|
197
|
+
# separator substrings for each split
|
198
|
+
#
|
199
|
+
# - count: the number of splits
|
200
|
+
#
|
201
|
+
# - accept: a proc whose return value determines whether each split should be
|
202
|
+
# accepted (true) or rejected (false)
|
203
|
+
#
|
204
|
+
def init(string:, delimiter:, select:, reject:, block:)
|
205
|
+
if reject
|
206
|
+
positions = reject
|
207
|
+
action = Action::REJECT
|
208
|
+
elsif select
|
209
|
+
positions = select
|
210
|
+
action = Action::SELECT
|
211
|
+
end
|
117
212
|
|
118
|
-
|
119
|
-
end
|
213
|
+
splits = parse(string, delimiter)
|
120
214
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
separator: separator,
|
125
|
-
captures: captures,
|
126
|
-
}
|
215
|
+
if splits.empty?
|
216
|
+
result = string.empty? ? [] : [string]
|
217
|
+
return [result]
|
127
218
|
end
|
128
219
|
|
129
|
-
|
220
|
+
block ||= positions ? compile(positions, action, splits.length) : ACCEPT_ALL
|
221
|
+
[[], splits, splits.length, block]
|
130
222
|
end
|
131
223
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
224
|
+
def render(values)
|
225
|
+
values.flat_map do |value|
|
226
|
+
if value.is_a?(String)
|
227
|
+
value.empty? && @remove_empty_fields ? REMOVE : [value]
|
228
|
+
elsif @include_captures
|
229
|
+
if @spread_captures
|
230
|
+
@spread_captures == :compact ? value.compact : value
|
231
|
+
elsif value.empty?
|
232
|
+
# we expose non-captures (string delimiters or regexps with no
|
233
|
+
# captures) as empty arrays inside the block, so the type is
|
234
|
+
# consistent, but it doesn't make sense to keep them in the
|
235
|
+
# result
|
236
|
+
REMOVE
|
237
|
+
else
|
238
|
+
[value]
|
239
|
+
end
|
240
|
+
else
|
241
|
+
REMOVE
|
242
|
+
end
|
137
243
|
end
|
138
|
-
|
139
|
-
ncaptures = match.captures.length
|
140
|
-
delimiter = increment_backrefs(delimiter, ncaptures)
|
141
|
-
parts = string.split(/(#{delimiter})/, -1)
|
142
|
-
remove_trailing_empty_field!(parts, ncaptures)
|
143
|
-
result, splits = splits_for(parts, ncaptures)
|
144
|
-
count = splits.length
|
145
|
-
block ||= at ? match_positions(at, count) : ACCEPT
|
146
|
-
|
147
|
-
[result, block, splits, count, -1]
|
148
244
|
end
|
149
245
|
|
150
|
-
#
|
151
|
-
#
|
152
|
-
#
|
153
|
-
# e.g. to split on:
|
246
|
+
# takes a string and a delimiter pattern (regex or string) and splits it along
|
247
|
+
# the delimiter, returning an array of objects (hashes) representing each split.
|
248
|
+
# e.g. for:
|
154
249
|
#
|
155
|
-
#
|
156
|
-
# - <bar-comment> ... </bar-comment>
|
250
|
+
# parse.split("foo:bar:baz:quux", ":")
|
157
251
|
#
|
158
|
-
#
|
252
|
+
# we return:
|
159
253
|
#
|
160
|
-
#
|
254
|
+
# [
|
255
|
+
# { lhs: "foo", rhs: "bar", separator: ":", captures: [] },
|
256
|
+
# { lhs: "bar", rhs: "baz", separator: ":", captures: [] },
|
257
|
+
# { lhs: "baz", rhs: "quux", separator: ":", captures: [] },
|
258
|
+
# ]
|
161
259
|
#
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
#
|
166
|
-
# %r| ( <(\w+-comment)> [^<]* </\2> ) |x
|
260
|
+
def parse(string, delimiter)
|
261
|
+
result = []
|
262
|
+
start = 0
|
167
263
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
264
|
+
# we don't use the argument passed to the +scan+ block here because it's a
|
265
|
+
# string (the separator) if there are no captures, rather than an empty
|
266
|
+
# array. we use match.captures instead to get the array
|
267
|
+
string.scan(delimiter) do
|
268
|
+
match = Regexp.last_match
|
269
|
+
index, after = match.offset(0)
|
270
|
+
separator = match[0]
|
271
|
+
|
272
|
+
# ignore empty separators at the beginning and/or end of the string
|
273
|
+
next if separator.empty? && (index.zero? || after == string.length)
|
274
|
+
|
275
|
+
lhs = string.slice(start, index - start)
|
276
|
+
result.last[:rhs] = lhs unless result.empty?
|
277
|
+
|
278
|
+
# this is correct for the last/only match, but gets updated to the next
|
279
|
+
# match's lhs for other matches
|
280
|
+
rhs = match.post_match
|
281
|
+
|
282
|
+
result << {
|
283
|
+
captures: match.captures,
|
284
|
+
lhs: lhs,
|
285
|
+
rhs: rhs,
|
286
|
+
separator: separator,
|
287
|
+
}
|
288
|
+
|
289
|
+
# move the start index (the start of the next lhs) to the index after the
|
290
|
+
# last character of the separator
|
291
|
+
start = after
|
174
292
|
end
|
175
293
|
|
176
|
-
|
294
|
+
result
|
177
295
|
end
|
178
296
|
|
179
|
-
#
|
180
|
-
# on
|
297
|
+
# returns a lambda which splits at (i.e. accepts or rejects splits at, depending
|
298
|
+
# on the action) the supplied positions
|
181
299
|
#
|
182
|
-
#
|
183
|
-
#
|
184
|
-
# # => ["f", "o", "o", "b", "a", "r", ""]
|
300
|
+
# positions are preprocessed to support additional features: negative
|
301
|
+
# ranges, infinite ranges, and descending ranges, e.g.:
|
185
302
|
#
|
186
|
-
#
|
187
|
-
# # => ["f", "", "o", "", "o", "", "b", "", "a", "", "r", "", ""]
|
303
|
+
# ss.split("foo:bar:baz:quux", ":", at: -1)
|
188
304
|
#
|
189
|
-
#
|
190
|
-
# # => ["f", "", "", "o", "", "", "o", "", "", "b", "", "", "a", "", "", "r", "", "", ""]
|
305
|
+
# translates to:
|
191
306
|
#
|
192
|
-
#
|
193
|
-
#
|
194
|
-
#
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
#
|
307
|
+
# ss.split("foo:bar:baz:quux", ":", at: 3)
|
308
|
+
#
|
309
|
+
# and
|
310
|
+
#
|
311
|
+
# ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
|
312
|
+
# ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
|
313
|
+
#
|
314
|
+
# translate to:
|
315
|
+
#
|
316
|
+
# ss.split("foo:bar:baz:quux", ":", at: 6..8)
|
317
|
+
#
|
318
|
+
def compile(positions, action, count)
|
319
|
+
# XXX note: we don't use modulo, because we don't want
|
320
|
+
# out-of-bounds indices to silently work, e.g. we don't want:
|
205
321
|
#
|
206
|
-
#
|
322
|
+
# ss.split("foo:bar:baz:quux", ":", at: -42)
|
207
323
|
#
|
208
|
-
#
|
209
|
-
# + ncaptures
|
210
|
-
# + 1 (separator)
|
324
|
+
# to mysteriously match when the index/position is 0/1
|
211
325
|
#
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
# to mysteriously match when the position is 2
|
239
|
-
|
240
|
-
nsplits + 1 + position
|
326
|
+
resolve = ->(int) { int.negative? ? count + 1 + int : int }
|
327
|
+
|
328
|
+
# don't use Array(...) to wrap these as we don't want to convert ranges
|
329
|
+
positions = positions.is_a?(Array) ? positions : [positions]
|
330
|
+
|
331
|
+
positions = positions.map do |position|
|
332
|
+
if position.is_a?(Integer)
|
333
|
+
resolve[position]
|
334
|
+
elsif position.is_a?(Range)
|
335
|
+
rbegin = position.begin
|
336
|
+
rend = position.end
|
337
|
+
rexc = position.exclude_end?
|
338
|
+
|
339
|
+
if rbegin.nil?
|
340
|
+
Range.new(1, resolve[rend], rexc)
|
341
|
+
elsif rend.nil?
|
342
|
+
Range.new(resolve[rbegin], count, rexc)
|
343
|
+
elsif rbegin.negative? || rend.negative? || (rend - rbegin).negative?
|
344
|
+
from = resolve[rbegin]
|
345
|
+
to = resolve[rend]
|
346
|
+
to < from ? Range.new(to, from, rexc) : Range.new(from, to, rexc)
|
347
|
+
else
|
348
|
+
position
|
349
|
+
end
|
350
|
+
elsif position.is_a?(Set)
|
351
|
+
position.map { |it| resolve[it] }.to_set
|
241
352
|
else
|
242
353
|
position
|
243
354
|
end
|
244
355
|
end
|
245
356
|
|
246
|
-
|
247
|
-
case split.position when *positions then true else false end
|
248
|
-
end
|
357
|
+
->(split) { case split.position when *positions then action else !action end }
|
249
358
|
end
|
250
359
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chocolateboy
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-08-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: values
|
@@ -30,42 +30,42 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1
|
33
|
+
version: '2.1'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1
|
40
|
+
version: '2.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '5.
|
47
|
+
version: '5.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '5.
|
54
|
+
version: '5.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: minitest-power_assert
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.3
|
61
|
+
version: '0.3'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.3
|
68
|
+
version: '0.3'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: minitest-reporters
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -86,29 +86,15 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
89
|
+
version: '13.0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
97
|
-
|
98
|
-
name: rubocop
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: 0.54.0
|
104
|
-
type: :development
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - "~>"
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: 0.54.0
|
111
|
-
description:
|
96
|
+
version: '13.0'
|
97
|
+
description:
|
112
98
|
email: chocolate@cpan.org
|
113
99
|
executables: []
|
114
100
|
extensions: []
|
@@ -127,7 +113,7 @@ metadata:
|
|
127
113
|
bug_tracker_uri: https://github.com/chocolateboy/string_splitter/issues
|
128
114
|
changelog_uri: https://github.com/chocolateboy/string_splitter/blob/master/CHANGELOG.md
|
129
115
|
source_code_uri: https://github.com/chocolateboy/string_splitter
|
130
|
-
post_install_message:
|
116
|
+
post_install_message:
|
131
117
|
rdoc_options: []
|
132
118
|
require_paths:
|
133
119
|
- lib
|
@@ -135,16 +121,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
135
121
|
requirements:
|
136
122
|
- - ">="
|
137
123
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
124
|
+
version: '2.3'
|
139
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
126
|
requirements:
|
141
127
|
- - ">="
|
142
128
|
- !ruby/object:Gem::Version
|
143
129
|
version: '0'
|
144
130
|
requirements: []
|
145
|
-
|
146
|
-
|
147
|
-
signing_key:
|
131
|
+
rubygems_version: 3.1.4
|
132
|
+
signing_key:
|
148
133
|
specification_version: 4
|
149
134
|
summary: String#split on steroids
|
150
135
|
test_files: []
|