string_splitter 0.3.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +63 -7
- data/README.md +168 -53
- data/lib/string_splitter.rb +281 -131
- data/lib/string_splitter/version.rb +1 -1
- metadata +16 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d97ccb956fe51694359cdb0d3a997d6574de088bac6ed5a8e572f92bb5ed54a
|
4
|
+
data.tar.gz: 845cefeb5efd5d01baa45759cb05ff7ae5e9a457c1f148b340bb24c038bd259e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a935a6e0f3434801dcae6a32575779e1d2eb706f8f208087a208e7fdba39ac5b49928f8b7617aec60493a8db5988a013028650f8b2ced01fadb620bfd4c77e5
|
7
|
+
data.tar.gz: d76c18a283c1e113c8bffb73b813eb6074481faa7ea339811dc9a7424a5e24fdc3efbe9afa941459e566cde8271c3cd19a97e3a37a8cf90d36a65a7bf8fd6dcf
|
data/CHANGELOG.md
CHANGED
@@ -1,18 +1,74 @@
|
|
1
|
+
## 0.6.0 - 2020-08-20
|
2
|
+
|
3
|
+
#### Breaking Changes
|
4
|
+
|
5
|
+
- `ss.split(str, " ")` is no longer treated the same as `ss.split(str)` i.e.
|
6
|
+
unlike Ruby's `String#split` (but like Crystal's), the former no longer
|
7
|
+
strips the string before splitting
|
8
|
+
- rename the `remove_empty` option `remove_empty_fields`
|
9
|
+
- rename the `exclude` option `except` (alias for `reject`)
|
10
|
+
|
11
|
+
#### Fixes
|
12
|
+
|
13
|
+
- correctly handle backreferences in delimiter patterns
|
14
|
+
|
15
|
+
#### Features
|
16
|
+
|
17
|
+
- add support for descending, negative, and infinite ranges,
|
18
|
+
e.g. `ss.split(str, ":", at: [..4, 4..., 3..1, -1..-3])` etc.
|
19
|
+
|
20
|
+
## 0.5.1 - 2018-07-01
|
21
|
+
|
22
|
+
#### Changes
|
23
|
+
|
24
|
+
- set StringSplitter::VERSION when `string_splitter.rb` is loaded
|
25
|
+
|
26
|
+
## 0.5.0 - 2018-06-26
|
27
|
+
|
28
|
+
#### Fixes
|
29
|
+
|
30
|
+
- don't treat string delimiters as patterns
|
31
|
+
|
32
|
+
#### Features
|
33
|
+
|
34
|
+
- add a `reject`/`exclude` option which rejects splits at the specified positions
|
35
|
+
- add a `select` alias for `at`
|
36
|
+
|
37
|
+
## 0.4.0 - 2018-06-24
|
38
|
+
|
39
|
+
#### Breaking Changes
|
40
|
+
|
41
|
+
- remove the `offset` alias for `split.index`
|
42
|
+
|
43
|
+
## 0.3.1 - 2018-06-24
|
44
|
+
|
45
|
+
#### Fixes
|
46
|
+
|
47
|
+
- remove trailing empty field when the separator is empty
|
48
|
+
([#1](https://github.com/chocolateboy/string_splitter/issues/1))
|
49
|
+
|
1
50
|
## 0.3.0 - 2018-06-23
|
2
51
|
|
3
|
-
|
4
|
-
|
5
|
-
|
52
|
+
#### Breaking Changes
|
53
|
+
|
54
|
+
- rename the `default_separator` option `default_delimiter`
|
6
55
|
|
7
56
|
## 0.2.0 - 2018-06-22
|
8
57
|
|
9
|
-
|
10
|
-
|
58
|
+
#### Breaking Changes
|
59
|
+
|
60
|
+
- make `index` (AKA `offset`) 0-based and add `position` (AKA `pos`) as the
|
61
|
+
1-based accessor
|
11
62
|
|
12
63
|
## 0.1.0 - 2018-06-22
|
13
64
|
|
14
|
-
|
15
|
-
|
65
|
+
#### Breaking Changes
|
66
|
+
|
67
|
+
- the block now takes a single `split` object with an `index` accessor, rather
|
68
|
+
than seperate `index` and `split` arguments
|
69
|
+
|
70
|
+
#### Features
|
71
|
+
|
16
72
|
- add support for negative indices in the value supplied to the `at` option
|
17
73
|
- add a `count` field to the split object containing the total number of splits
|
18
74
|
|
data/README.md
CHANGED
@@ -3,14 +3,16 @@
|
|
3
3
|
[![Build Status](https://travis-ci.org/chocolateboy/string_splitter.svg)](https://travis-ci.org/chocolateboy/string_splitter)
|
4
4
|
[![Gem Version](https://img.shields.io/gem/v/string_splitter.svg)](https://rubygems.org/gems/string_splitter)
|
5
5
|
|
6
|
-
<!--
|
7
|
-
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
6
|
+
<!-- toc -->
|
8
7
|
|
9
8
|
- [NAME](#name)
|
10
9
|
- [INSTALLATION](#installation)
|
11
10
|
- [SYNOPSIS](#synopsis)
|
12
11
|
- [DESCRIPTION](#description)
|
13
12
|
- [WHY?](#why)
|
13
|
+
- [CAVEATS](#caveats)
|
14
|
+
- [Differences from String#split](#differences-from-string%23split)
|
15
|
+
- [COMPATIBILITY](#compatibility)
|
14
16
|
- [VERSION](#version)
|
15
17
|
- [SEE ALSO](#see-also)
|
16
18
|
- [Gems](#gems)
|
@@ -18,7 +20,7 @@
|
|
18
20
|
- [AUTHOR](#author)
|
19
21
|
- [COPYRIGHT AND LICENSE](#copyright-and-license)
|
20
22
|
|
21
|
-
<!--
|
23
|
+
<!-- tocstop -->
|
22
24
|
|
23
25
|
# NAME
|
24
26
|
|
@@ -36,65 +38,137 @@ gem "string_splitter"
|
|
36
38
|
require "string_splitter"
|
37
39
|
|
38
40
|
ss = StringSplitter.new
|
41
|
+
```
|
42
|
+
|
43
|
+
**Same as `String#split`**
|
44
|
+
|
45
|
+
```ruby
|
46
|
+
ss.split("foo bar baz")
|
47
|
+
ss.split(" foo bar baz ")
|
48
|
+
# => ["foo", "bar", "baz"]
|
49
|
+
```
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
ss.split("foo", "")
|
53
|
+
ss.split("foo", //)
|
54
|
+
# => ["f", "o", "o"]
|
55
|
+
```
|
39
56
|
|
40
|
-
|
41
|
-
ss.split("
|
42
|
-
ss.split("
|
43
|
-
|
44
|
-
|
57
|
+
```ruby
|
58
|
+
ss.split("", "...")
|
59
|
+
ss.split("", /.../)
|
60
|
+
# => []
|
61
|
+
```
|
45
62
|
|
46
|
-
|
63
|
+
**Split at the first delimiter**
|
64
|
+
|
65
|
+
```ruby
|
47
66
|
ss.split("foo:bar:baz:quux", ":", at: 1)
|
67
|
+
ss.split("foo:bar:baz:quux", ":", select: 1)
|
48
68
|
# => ["foo", "bar:baz:quux"]
|
69
|
+
```
|
70
|
+
|
71
|
+
**Split at the last delimiter**
|
49
72
|
|
50
|
-
|
73
|
+
```ruby
|
51
74
|
ss.split("foo:bar:baz:quux", ":", at: -1)
|
52
75
|
# => ["foo:bar:baz", "quux"]
|
76
|
+
```
|
53
77
|
|
54
|
-
|
55
|
-
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -2])
|
56
|
-
# => ["1", "2", "3", "4:5:6:7", "8:9"]
|
78
|
+
**Split at multiple delimiter positions**
|
57
79
|
|
58
|
-
|
80
|
+
```ruby
|
81
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -1])
|
82
|
+
# => ["1", "2", "3", "4:5:6:7:8", "9"]
|
83
|
+
```
|
84
|
+
|
85
|
+
**Split at all but the first and last delimiters**
|
86
|
+
|
87
|
+
```ruby
|
88
|
+
ss.split("1:2:3:4:5:6", ":", except: [1, -1])
|
89
|
+
ss.split("1:2:3:4:5:6", ":", reject: [1, -1])
|
90
|
+
# => ["1:2", "3", "4", "5:6"]
|
91
|
+
```
|
92
|
+
|
93
|
+
**Split from the right**
|
94
|
+
|
95
|
+
```ruby
|
59
96
|
ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3, 5])
|
60
97
|
# => ["1:2:3:4", "5:6", "7", "8", "9"]
|
98
|
+
```
|
61
99
|
|
62
|
-
|
63
|
-
|
64
|
-
|
100
|
+
**Split with negative, descending, and infinite ranges**
|
101
|
+
|
102
|
+
```ruby
|
103
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: 4...)
|
104
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [4...])
|
105
|
+
# => ["1:2:3:4", "5", "6", "7", "8:9"]
|
106
|
+
```
|
107
|
+
|
108
|
+
```ruby
|
109
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: ..-3)
|
110
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [..-3])
|
111
|
+
# => ["1", "2", "3", "4", "5", "6", "7:8:9"]
|
112
|
+
```
|
113
|
+
|
114
|
+
```ruby
|
115
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1, 5..3, -2..])
|
116
|
+
# => ["1", "2:3", "4", "5", "6:7", "8", "9"]
|
117
|
+
```
|
118
|
+
|
119
|
+
**Full control via a block**
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
result = ss.split("1:2:3:4:5:6:7:8", ":") do |split|
|
123
|
+
split.pos % 2 == 0
|
65
124
|
end
|
66
|
-
# => ["
|
125
|
+
# => ["1:2", "3:4", "5:6", "7:8"]
|
126
|
+
```
|
127
|
+
|
128
|
+
```ruby
|
129
|
+
string = "banana".chars.sort.join # "aaabnn"
|
130
|
+
|
131
|
+
ss.split(string, "") do |split|
|
132
|
+
split.rhs != split.lhs
|
133
|
+
end
|
134
|
+
# => ["aaa", "b", "nn"]
|
67
135
|
```
|
68
136
|
|
69
137
|
# DESCRIPTION
|
70
138
|
|
71
|
-
Many languages have built-in
|
72
|
-
(notwithstanding the occasional
|
73
|
-
and
|
139
|
+
Many languages have built-in `split` functions/methods for strings. They behave
|
140
|
+
similarly (notwithstanding the occasional
|
141
|
+
[surprise](https://chriszetter.com/blog/2017/10/29/splitting-strings/)), and
|
142
|
+
handle a few common cases e.g.:
|
74
143
|
|
75
144
|
* limiting the number of splits
|
76
|
-
* including the
|
145
|
+
* including the separator(s) in the results
|
77
146
|
* removing (some) empty fields
|
78
147
|
|
79
|
-
But, because the API is squeezed into two overloaded parameters (the delimiter
|
80
|
-
achieving the desired
|
81
|
-
trailing fields (by default), it provides no
|
82
|
-
|
83
|
-
to
|
148
|
+
But, because the API is squeezed into two overloaded parameters (the delimiter
|
149
|
+
and the limit), achieving the desired results can be tricky. For instance,
|
150
|
+
while `String#split` removes empty trailing fields (by default), it provides no
|
151
|
+
way to remove *all* empty fields. Likewise, the cramped API means there's no
|
152
|
+
way to e.g. combine a limit (positive integer) with the option to preserve
|
153
|
+
empty fields (negative integer), or use backreferences in a delimiter pattern
|
84
154
|
without including its captured subexpressions in the result.
|
85
155
|
|
86
|
-
If `split` was being written from scratch, without the baggage of its legacy
|
87
|
-
it's possible that some of these options would be made explicit rather
|
88
|
-
the parameters. And, indeed, this is possible in some
|
89
|
-
e.g. in Crystal:
|
156
|
+
If `split` was being written from scratch, without the baggage of its legacy
|
157
|
+
API, it's possible that some of these options would be made explicit rather
|
158
|
+
than overloading the parameters. And, indeed, this is possible in some
|
159
|
+
implementations, e.g. in Crystal:
|
90
160
|
|
91
161
|
```ruby
|
92
|
-
":foo:bar:baz:".split(":", remove_empty: false)
|
93
|
-
|
162
|
+
":foo:bar:baz:".split(":", remove_empty: false)
|
163
|
+
# => ["", "foo", "bar", "baz", ""]
|
164
|
+
|
165
|
+
":foo:bar:baz:".split(":", remove_empty: true)
|
166
|
+
# => ["foo", "bar", "baz"]
|
94
167
|
````
|
95
168
|
|
96
|
-
StringSplitter takes this one step further by moving the configuration out of
|
97
|
-
and delegating the strategy — i.e. which splits should be
|
169
|
+
StringSplitter takes this one step further by moving the configuration out of
|
170
|
+
the method altogether and delegating the strategy — i.e. which splits should be
|
171
|
+
accepted or rejected — to a block:
|
98
172
|
|
99
173
|
```ruby
|
100
174
|
ss = StringSplitter.new
|
@@ -102,22 +176,29 @@ ss = StringSplitter.new
|
|
102
176
|
ss.split("foo:bar:baz", ":") { |split| split.index == 0 }
|
103
177
|
# => ["foo", "bar:baz"]
|
104
178
|
|
105
|
-
ss.split("foo:bar:baz", ":")
|
106
|
-
|
179
|
+
ss.split("foo:bar:baz:quux", ":") do |split|
|
180
|
+
split.position == 1 || split.position == 3
|
181
|
+
end
|
182
|
+
# => ["foo", "bar:baz", "quux"]
|
107
183
|
```
|
108
184
|
|
109
|
-
As a shortcut, the common case of splitting on delimiters at one or more
|
185
|
+
As a shortcut, the common case of splitting on delimiters at one or more
|
186
|
+
positions is supported by an option:
|
110
187
|
|
111
188
|
```ruby
|
112
|
-
ss.split(
|
189
|
+
ss.split("foo:bar:baz:quux", ":", at: [1, -1])
|
190
|
+
# => ["foo", "bar:baz", "quux"]
|
113
191
|
```
|
114
192
|
|
115
193
|
# WHY?
|
116
194
|
|
117
|
-
I wanted to split semi-structured output into fields without having to resort
|
195
|
+
I wanted to split semi-structured output into fields without having to resort
|
196
|
+
to a regex or a full-blown parser.
|
118
197
|
|
119
|
-
As an example, the nominally unstructured output of many Unix commands is often
|
120
|
-
that's tantalizingly close to being
|
198
|
+
As an example, the nominally unstructured output of many Unix commands is often
|
199
|
+
formatted in a way that's tantalizingly close to being
|
200
|
+
[machine-readable](https://en.wikipedia.org/wiki/Delimiter-separated_values),
|
201
|
+
apart from a few pesky exceptions e.g.:
|
121
202
|
|
122
203
|
```bash
|
123
204
|
$ ls -l
|
@@ -129,8 +210,8 @@ drwxr-xr-x 3 user users 4096 Jun 19 22:56 lib
|
|
129
210
|
-rw-r--r-- 1 user users 3134 Jun 19 22:59 README.md
|
130
211
|
```
|
131
212
|
|
132
|
-
These lines can *almost* be parsed into an array of fields by splitting them on
|
133
|
-
date (columns 6-8) i.e.:
|
213
|
+
These lines can *almost* be parsed into an array of fields by splitting them on
|
214
|
+
whitespace. The exception is the date (columns 6-8) i.e.:
|
134
215
|
|
135
216
|
```ruby
|
136
217
|
line = "-rw-r--r-- 1 user users 87 Jun 18 18:16 CHANGELOG.md"
|
@@ -155,13 +236,14 @@ One way to work around this is to parse the whole line e.g.:
|
|
155
236
|
line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+ \S+) \s+ (.+)$/x)
|
156
237
|
```
|
157
238
|
|
158
|
-
But that requires us to specify *everything*. What we really want is a version
|
159
|
-
which allows us to veto splitting for the 6th and 7th delimiters
|
160
|
-
|
161
|
-
|
239
|
+
But that requires us to specify *everything*. What we really want is a version
|
240
|
+
of `split` which allows us to veto splitting for the 6th and 7th delimiters
|
241
|
+
(and to stop after the 8th delimiter) i.e. control over which splits are
|
242
|
+
accepted, rather than being restricted to the single, baked-in strategy
|
243
|
+
provided by the `limit` parameter.
|
162
244
|
|
163
|
-
By providing a simple way to accept or reject each split, StringSplitter makes
|
164
|
-
this easy to handle, either via a block:
|
245
|
+
By providing a simple way to accept or reject each split, StringSplitter makes
|
246
|
+
cases like this easy to handle, either via a block:
|
165
247
|
|
166
248
|
```ruby
|
167
249
|
ss.split(line) do |split|
|
@@ -177,9 +259,42 @@ ss.split(line, at: [1..5, 8])
|
|
177
259
|
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
178
260
|
```
|
179
261
|
|
262
|
+
# CAVEATS
|
263
|
+
|
264
|
+
## Differences from String#split
|
265
|
+
|
266
|
+
StringSplitter shares `String#split`'s behavior of trimming the string before
|
267
|
+
splitting if the delimiter is omitted, e.g.:
|
268
|
+
|
269
|
+
```ruby
|
270
|
+
" foo bar baz ".split # => ["foo", "bar", "baz"]
|
271
|
+
ss.split(" foo bar baz ") # => ["foo", "bar", "baz"]
|
272
|
+
```
|
273
|
+
|
274
|
+
However, unlike `String#split`, this doesn't also apply if a delimiter of `" "`
|
275
|
+
is supplied, e.g.:
|
276
|
+
|
277
|
+
```ruby
|
278
|
+
" foo bar baz ".split(" ") # => ["foo", "bar", "baz"]
|
279
|
+
ss.split(" foo bar baz ", " ") # => ["", "foo", "bar", "baz", ""]
|
280
|
+
```
|
281
|
+
|
282
|
+
It also doesn't apply if a custom default-delimiter is defined:
|
283
|
+
|
284
|
+
```ruby
|
285
|
+
ss = StringSplitter.new(default_delimiter: /\s+/)
|
286
|
+
ss.split(" foo bar baz ") # => ["", "foo", "bar", "baz", ""]
|
287
|
+
```
|
288
|
+
|
289
|
+
# COMPATIBILITY
|
290
|
+
|
291
|
+
StringSplitter is tested and supported on all versions of Ruby [supported by
|
292
|
+
the ruby-core team](https://www.ruby-lang.org/en/downloads/branches/), i.e.,
|
293
|
+
currently, Ruby 2.5 and above.
|
294
|
+
|
180
295
|
# VERSION
|
181
296
|
|
182
|
-
0.
|
297
|
+
0.6.0
|
183
298
|
|
184
299
|
# SEE ALSO
|
185
300
|
|
@@ -197,7 +312,7 @@ ss.split(line, at: [1..5, 8])
|
|
197
312
|
|
198
313
|
# COPYRIGHT AND LICENSE
|
199
314
|
|
200
|
-
Copyright © 2018 by chocolateboy.
|
315
|
+
Copyright © 2018-2020 by chocolateboy.
|
201
316
|
|
202
317
|
This is free software; you can redistribute it and/or modify it under the
|
203
|
-
terms of the [Artistic License 2.0](
|
318
|
+
terms of the [Artistic License 2.0](https://www.opensource.org/licenses/artistic-license-2.0.php).
|
data/lib/string_splitter.rb
CHANGED
@@ -1,204 +1,354 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'set'
|
3
4
|
require 'values'
|
5
|
+
require_relative 'string_splitter/version'
|
4
6
|
|
5
7
|
# This class extends the functionality of +String#split+ by:
|
6
8
|
#
|
7
9
|
# - providing full control over which splits are accepted or rejected
|
10
|
+
#
|
8
11
|
# - adding support for splitting from right-to-left
|
9
|
-
#
|
10
|
-
#
|
12
|
+
#
|
13
|
+
# - encapsulating splitting options/preferences in the splitter rather
|
14
|
+
# than trying to cram them into overloaded method parameters
|
11
15
|
#
|
12
16
|
# These enhancements allow splits to handle many cases that otherwise require bigger
|
13
|
-
# guns e.g. regex matching or parsing.
|
17
|
+
# guns, e.g. regex matching or parsing.
|
18
|
+
#
|
19
|
+
# Implementation-wise, we effectively use the built-in +String#split+ method as a
|
20
|
+
# tokenizer, and parse the resulting tokens into an array of Split objects with the
|
21
|
+
# following fields:
|
22
|
+
#
|
23
|
+
# - captures: separator substrings captured by parentheses in the delimiter pattern
|
24
|
+
# - count: the number of splits
|
25
|
+
# - index: the 0-based index of the split in the array
|
26
|
+
# - lhs: the string to the left of the separator (back to the previous split candidate)
|
27
|
+
# - position: the 1-based index of the split in the array (alias: pos)
|
28
|
+
# - rhs: the string to the right of the separator (up to the next split candidate)
|
29
|
+
# - rindex: the 0-based index of the split relative to the end of the array
|
30
|
+
# - rposition: the 1-based index of the split relative to the end of the array (alias: rpos)
|
31
|
+
# - separator: the string matched by the delimiter pattern/string
|
32
|
+
#
|
14
33
|
class StringSplitter
|
15
|
-
|
16
|
-
|
17
|
-
|
34
|
+
# terminology: the delimiter is what we provide and the separators are what we get
|
35
|
+
# back (if we capture them). e.g. for:
|
36
|
+
#
|
37
|
+
# ss.split("foo:bar::baz", /(\W+)/)
|
38
|
+
#
|
39
|
+
# the delimiter is /(\W)/ and the separators are ":" and "::"
|
40
|
+
|
41
|
+
ACCEPT_ALL = ->(_split) { true }
|
42
|
+
DEFAULT_DELIMITER = /\s+/.freeze
|
18
43
|
|
19
44
|
Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator) do
|
20
45
|
def position
|
21
46
|
index + 1
|
22
47
|
end
|
23
48
|
|
24
|
-
alias_method :offset, :index
|
25
49
|
alias_method :pos, :position
|
50
|
+
|
51
|
+
# 0-based index relative to the end of the array, e.g. for 5 items:
|
52
|
+
#
|
53
|
+
# index | rindex
|
54
|
+
# ------|-------
|
55
|
+
# 0 | 4
|
56
|
+
# 1 | 3
|
57
|
+
# 2 | 2
|
58
|
+
# 3 | 1
|
59
|
+
# 4 | 0
|
60
|
+
def rindex
|
61
|
+
count - position
|
62
|
+
end
|
63
|
+
|
64
|
+
# 1-based position relative to the end of the array, e.g. for 5 items:
|
65
|
+
#
|
66
|
+
# position | rposition
|
67
|
+
# ----------|----------
|
68
|
+
# 1 | 5
|
69
|
+
# 2 | 4
|
70
|
+
# 3 | 3
|
71
|
+
# 4 | 2
|
72
|
+
# 5 | 1
|
73
|
+
def rposition
|
74
|
+
count + 1 - position
|
75
|
+
end
|
76
|
+
|
77
|
+
alias_method :rpos, :rposition
|
26
78
|
end
|
27
79
|
|
80
|
+
# simulate an enum. the value is returned by the case statement
|
81
|
+
# in the generated block if the positions match
|
82
|
+
module Action
|
83
|
+
SELECT = true
|
84
|
+
REJECT = false
|
85
|
+
end
|
86
|
+
|
87
|
+
private_constant :Action
|
88
|
+
|
28
89
|
def initialize(
|
29
90
|
default_delimiter: DEFAULT_DELIMITER,
|
30
91
|
include_captures: true,
|
31
|
-
remove_empty: false,
|
92
|
+
remove_empty: false, # TODO remove this
|
93
|
+
remove_empty_fields: remove_empty,
|
32
94
|
spread_captures: true
|
33
95
|
)
|
34
96
|
@default_delimiter = default_delimiter
|
35
97
|
@include_captures = include_captures
|
36
|
-
@
|
98
|
+
@remove_empty_fields = remove_empty_fields
|
37
99
|
@spread_captures = spread_captures
|
38
100
|
end
|
39
101
|
|
40
|
-
attr_reader
|
41
|
-
|
42
|
-
|
43
|
-
|
102
|
+
attr_reader(
|
103
|
+
:default_delimiter,
|
104
|
+
:include_captures,
|
105
|
+
:remove_empty_fields,
|
106
|
+
:spread_captures
|
107
|
+
)
|
44
108
|
|
45
|
-
|
46
|
-
|
109
|
+
# TODO remove this
|
110
|
+
alias remove_empty remove_empty_fields
|
111
|
+
|
112
|
+
def split(
|
113
|
+
string,
|
114
|
+
delimiter = @default_delimiter,
|
115
|
+
at: nil, # alias for select
|
116
|
+
except: nil, # alias for reject
|
117
|
+
select: at,
|
118
|
+
reject: except,
|
119
|
+
&block
|
120
|
+
)
|
121
|
+
result, splits, count, accept = init(
|
122
|
+
string: string,
|
123
|
+
delimiter: delimiter,
|
124
|
+
select: select,
|
125
|
+
reject: reject,
|
126
|
+
block: block
|
127
|
+
)
|
128
|
+
|
129
|
+
return result unless splits
|
130
|
+
|
131
|
+
splits.each_with_index do |hash, index|
|
132
|
+
split = Split.with(hash.merge({ count: count, index: index }))
|
47
133
|
result << split.lhs if result.empty?
|
48
134
|
|
49
|
-
if
|
50
|
-
|
51
|
-
if @spread_captures
|
52
|
-
result += split.captures
|
53
|
-
else
|
54
|
-
result << split.captures
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
result << split.rhs
|
135
|
+
if accept.call(split)
|
136
|
+
result << split.captures << split.rhs
|
59
137
|
else
|
60
138
|
# append the rhs
|
61
139
|
result[-1] = result[-1] + split.separator + split.rhs
|
62
140
|
end
|
63
141
|
end
|
64
142
|
|
65
|
-
result
|
143
|
+
render(result)
|
66
144
|
end
|
67
145
|
|
68
146
|
alias lsplit split
|
69
147
|
|
70
|
-
def rsplit(
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
148
|
+
def rsplit(
|
149
|
+
string,
|
150
|
+
delimiter = @default_delimiter,
|
151
|
+
at: nil, # alias for select
|
152
|
+
except: nil, # alias for reject
|
153
|
+
select: at,
|
154
|
+
reject: except,
|
155
|
+
&block
|
156
|
+
)
|
157
|
+
result, splits, count, accept = init(
|
158
|
+
string: string,
|
159
|
+
delimiter: delimiter,
|
160
|
+
select: select,
|
161
|
+
reject: reject,
|
162
|
+
block: block
|
163
|
+
)
|
164
|
+
|
165
|
+
return result unless splits
|
166
|
+
|
167
|
+
splits.reverse_each.with_index do |hash, index|
|
168
|
+
split = Split.with(hash.merge({ count: count, index: index }))
|
75
169
|
result.unshift(split.rhs) if result.empty?
|
76
170
|
|
77
|
-
if
|
78
|
-
|
79
|
-
|
80
|
-
result = split.captures + result
|
81
|
-
else
|
82
|
-
result.unshift(split.captures)
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
result.unshift(split.lhs)
|
171
|
+
if accept.call(split)
|
172
|
+
# [lhs + captures] + result
|
173
|
+
result.unshift(split.lhs, split.captures)
|
87
174
|
else
|
88
175
|
# prepend the lhs
|
89
176
|
result[0] = split.lhs + split.separator + result[0]
|
90
177
|
end
|
91
178
|
end
|
92
179
|
|
93
|
-
result
|
180
|
+
render(result)
|
94
181
|
end
|
95
182
|
|
96
183
|
private
|
97
184
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
185
|
+
# initialisation common to +split+ and +rsplit+
|
186
|
+
#
|
187
|
+
# takes a hash of options passed to +split+ or +rsplit+ and returns a triple with
|
188
|
+
# the following fields:
|
189
|
+
#
|
190
|
+
# - result: the array of separated strings to return from +split+ or +rsplit+.
|
191
|
+
# if the splits arry is empty, the caller returns this array immediately
|
192
|
+
# without any further processing
|
193
|
+
#
|
194
|
+
# - splits: an array of hashes containing the lhs, rhs, separator and captured
|
195
|
+
# separator substrings for each split
|
196
|
+
#
|
197
|
+
# - count: the number of splits
|
198
|
+
#
|
199
|
+
# - accept: a proc whose return value determines whether each split should be
|
200
|
+
# accepted (true) or rejected (false)
|
201
|
+
#
|
202
|
+
def init(string:, delimiter:, select:, reject:, block:)
|
203
|
+
if delimiter.equal?(DEFAULT_DELIMITER)
|
204
|
+
string = string.strip
|
205
|
+
end
|
117
206
|
|
118
|
-
|
119
|
-
|
207
|
+
if reject
|
208
|
+
positions = reject
|
209
|
+
action = Action::REJECT
|
210
|
+
elsif select
|
211
|
+
positions = select
|
212
|
+
action = Action::SELECT
|
213
|
+
end
|
120
214
|
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
}
|
215
|
+
splits = parse(string, delimiter)
|
216
|
+
|
217
|
+
if splits.empty?
|
218
|
+
result = string.empty? ? [] : [string]
|
219
|
+
return [result]
|
127
220
|
end
|
128
221
|
|
129
|
-
|
222
|
+
block ||= positions ? compile(positions, action, splits.length) : ACCEPT_ALL
|
223
|
+
[[], splits, splits.length, block]
|
130
224
|
end
|
131
225
|
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
result = (@remove_empty && string.empty?) ? [] : [string]
|
136
|
-
return [result, block, NO_SPLITS, 0, -1]
|
226
|
+
def render(result)
|
227
|
+
if @remove_empty_fields
|
228
|
+
result.reject! { |it| it.is_a?(String) && it.empty? }
|
137
229
|
end
|
138
230
|
|
139
|
-
|
140
|
-
|
141
|
-
if delimiter.is_a?(Regexp) && ncaptures > 0
|
142
|
-
# increment back-references so they remain valid when the outer capture
|
143
|
-
# is added e.g. to split on:
|
144
|
-
#
|
145
|
-
# - <foo-comment> ... </foo-comment>
|
146
|
-
# - <bar-comment> ... </bar-comment>
|
147
|
-
#
|
148
|
-
# etc.
|
149
|
-
#
|
150
|
-
# before:
|
151
|
-
#
|
152
|
-
# %r| <(\w+-comment)> [^<]* </\1> |x
|
153
|
-
#
|
154
|
-
# after:
|
155
|
-
#
|
156
|
-
# %r| ( <(\w+-comment)> [^<]* </\2> ) |x
|
157
|
-
|
158
|
-
delimiter = delimiter.to_s.gsub(/\\(?:(\d+)|.)/) do
|
159
|
-
match = Regexp.last_match
|
160
|
-
match[1] ? '\\' + match[1].to_i.next.to_s : match[0]
|
161
|
-
end
|
231
|
+
unless @include_captures
|
232
|
+
return result.reject! { |it| it.is_a?(Array) }
|
162
233
|
end
|
163
234
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
235
|
+
result.flat_map do |value|
|
236
|
+
next [value] unless value.is_a?(Array) && @spread_captures
|
237
|
+
@spread_captures == :compact ? value.compact : value
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
# takes a string and a delimiter pattern (regex or string) and splits it along
|
242
|
+
# the delimiter, returning an array of objects (hashes) representing each split.
|
243
|
+
# e.g. for:
|
244
|
+
#
|
245
|
+
# parse.split("foo:bar:baz:quux", ":")
|
246
|
+
#
|
247
|
+
# we return:
|
248
|
+
#
|
249
|
+
# [
|
250
|
+
# { lhs: "foo", rhs: "bar", separator: ":", captures: [] },
|
251
|
+
# { lhs: "bar", rhs: "baz", separator: ":", captures: [] },
|
252
|
+
# { lhs: "baz", rhs: "quux", separator: ":", captures: [] },
|
253
|
+
# ]
|
254
|
+
#
|
255
|
+
def parse(string, pattern)
|
256
|
+
result = []
|
257
|
+
start = 0
|
258
|
+
|
259
|
+
# we don't use the argument passed to the +scan+ block here because it's a
|
260
|
+
# string (the separator) if there are no captures, rather than an empty
|
261
|
+
# array. we use match.captures instead to get the array
|
262
|
+
string.scan(pattern) do
|
263
|
+
match = Regexp.last_match
|
264
|
+
index, after = match.offset(0)
|
265
|
+
separator = match[0]
|
266
|
+
|
267
|
+
# ignore empty separators at the beginning and/or end of the string
|
268
|
+
next if separator.empty? && (index.zero? || after == string.length)
|
269
|
+
|
270
|
+
lhs = string.slice(start, index - start)
|
271
|
+
result.last[:rhs] = lhs unless result.empty?
|
272
|
+
|
273
|
+
# this is correct for the last/only match, but gets updated to the next
|
274
|
+
# match's lhs for other matches
|
275
|
+
rhs = match.post_match
|
276
|
+
|
277
|
+
result << {
|
278
|
+
captures: match.captures,
|
279
|
+
lhs: lhs,
|
280
|
+
rhs: rhs,
|
281
|
+
separator: separator,
|
282
|
+
}
|
283
|
+
|
284
|
+
# move the start index (the start of the lhs) to the index after the last
|
285
|
+
# character of the separator
|
286
|
+
start = after
|
287
|
+
end
|
288
|
+
|
289
|
+
result
|
290
|
+
end
|
193
291
|
|
194
|
-
|
195
|
-
|
292
|
+
# returns a lambda which splits at (i.e. accepts or rejects splits at, depending
|
293
|
+
# on the action) the supplied positions
|
294
|
+
#
|
295
|
+
# positions are preprocessed to support an additional feature: negative indices
|
296
|
+
# are translated to 1-based non-negative indices, e.g:
|
297
|
+
#
|
298
|
+
# ss.split("foo:bar:baz:quux", ":", at: -1)
|
299
|
+
#
|
300
|
+
# translates to:
|
301
|
+
#
|
302
|
+
# ss.split("foo:bar:baz:quux", ":", at: 3)
|
303
|
+
#
|
304
|
+
# and
|
305
|
+
#
|
306
|
+
# ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
|
307
|
+
# ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
|
308
|
+
#
|
309
|
+
# translate to:
|
310
|
+
#
|
311
|
+
# ss.split("foo:bar:baz:quux", ":", at: 6..8)
|
312
|
+
#
|
313
|
+
def compile(positions, action, nsplits)
|
314
|
+
# XXX note: we don't use modulo, because we don't want
|
315
|
+
# out-of-bounds indices to silently work, e.g. we don't want:
|
316
|
+
#
|
317
|
+
# ss.split("foo:bar:baz:quux", ":", at: -42)
|
318
|
+
#
|
319
|
+
# to mysteriously match when the index/position is 0/1
|
320
|
+
#
|
321
|
+
resolve = ->(int) { int.negative? ? nsplits + 1 + int : int }
|
322
|
+
|
323
|
+
# don't use Array(...) to wrap these as we don't want to convert ranges
|
324
|
+
positions = positions.is_a?(Array) ? positions : [positions]
|
325
|
+
|
326
|
+
positions = positions.map do |position|
|
327
|
+
if position.is_a?(Integer)
|
328
|
+
resolve[position]
|
329
|
+
elsif position.is_a?(Range)
|
330
|
+
rbegin = position.begin
|
331
|
+
rend = position.end
|
332
|
+
rexc = position.exclude_end?
|
333
|
+
|
334
|
+
if rbegin.nil?
|
335
|
+
Range.new(1, resolve[rend], rexc)
|
336
|
+
elsif rend.nil?
|
337
|
+
Range.new(resolve[rbegin], nsplits, rexc)
|
338
|
+
elsif rbegin.negative? || rend.negative? || (rend - rbegin).negative?
|
339
|
+
from = resolve[rbegin]
|
340
|
+
to = resolve[rend]
|
341
|
+
to < from ? Range.new(to, from, rexc) : Range.new(from, to, rexc)
|
342
|
+
else
|
343
|
+
position
|
196
344
|
end
|
345
|
+
elsif position.is_a?(Set)
|
346
|
+
position.map { |it| resolve[it] }.to_set
|
197
347
|
else
|
198
|
-
|
348
|
+
position
|
199
349
|
end
|
200
350
|
end
|
201
351
|
|
202
|
-
|
352
|
+
->(split) { case split.position when *positions then action else !action end }
|
203
353
|
end
|
204
354
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chocolateboy
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: values
|
@@ -30,42 +30,42 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1
|
33
|
+
version: '2.1'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1
|
40
|
+
version: '2.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '5.
|
47
|
+
version: '5.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '5.
|
54
|
+
version: '5.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: minitest-power_assert
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.3
|
61
|
+
version: '0.3'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.3
|
68
|
+
version: '0.3'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: minitest-reporters
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -86,29 +86,15 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
89
|
+
version: '13.0'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
97
|
-
|
98
|
-
name: rubocop
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: 0.54.0
|
104
|
-
type: :development
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - "~>"
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: 0.54.0
|
111
|
-
description:
|
96
|
+
version: '13.0'
|
97
|
+
description:
|
112
98
|
email: chocolate@cpan.org
|
113
99
|
executables: []
|
114
100
|
extensions: []
|
@@ -127,7 +113,7 @@ metadata:
|
|
127
113
|
bug_tracker_uri: https://github.com/chocolateboy/string_splitter/issues
|
128
114
|
changelog_uri: https://github.com/chocolateboy/string_splitter/blob/master/CHANGELOG.md
|
129
115
|
source_code_uri: https://github.com/chocolateboy/string_splitter
|
130
|
-
post_install_message:
|
116
|
+
post_install_message:
|
131
117
|
rdoc_options: []
|
132
118
|
require_paths:
|
133
119
|
- lib
|
@@ -135,16 +121,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
135
121
|
requirements:
|
136
122
|
- - ">="
|
137
123
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
124
|
+
version: '2.3'
|
139
125
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
126
|
requirements:
|
141
127
|
- - ">="
|
142
128
|
- !ruby/object:Gem::Version
|
143
129
|
version: '0'
|
144
130
|
requirements: []
|
145
|
-
|
146
|
-
|
147
|
-
signing_key:
|
131
|
+
rubygems_version: 3.1.4
|
132
|
+
signing_key:
|
148
133
|
specification_version: 4
|
149
134
|
summary: String#split on steroids
|
150
135
|
test_files: []
|