string_splitter 0.3.1 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 67fd08fc0c1d5928d849206b28130eadedbd7c38755f1c123f4c3d46cbbc5619
4
- data.tar.gz: b102be89d4c59f9a2d3dd4661277a4fd3a31816f7dbae1630f2f6954bedad62a
3
+ metadata.gz: 400534de6c3143ef81b2ad46a3a6432b7d83ef0900024ebdde3f06a4e1714890
4
+ data.tar.gz: 643f5af7b9e13321dfa97b045b124d0c5ea576868b13141c264122bc96baea5e
5
5
  SHA512:
6
- metadata.gz: 87d567793e20367c52625d5fa9dd6cea5470221b3d53bc54d0bd59f0f8835635d81a67e1fcecba5fcce5a116c6ba6c346c4b74fa563ac21bee5ff0d06d07ad8b
7
- data.tar.gz: eab3f78e4c61e77c7bb283eb50e871d665fd4a323913e9fbf525ab2a6bfa05f0ebbabf490284371c00be6690f937f485823a8bc9dc3f59aafc9bff71c8cbe893
6
+ metadata.gz: 35bed8fe69b33314813fbd68a8da0e8f4799b7891275ac601b157caeb0e0a3780f37ec7e7876d808b8dfcbfdf7527f45c3af0dc0d679e133865e96949a1d9ce3
7
+ data.tar.gz: 8186e40d57654daf1a481ab74c128910f7aa346bc343a0a9933dc39b7cceeb204c1a55ac39b39321df46f7d02420fd87f93dd4a708be0a985d94833df018da87
@@ -1,22 +1,81 @@
1
+ ## 0.7.0 - 2020-08-21
2
+
3
+ #### Breaking Changes
4
+
5
+ - `String#split` incompatibility: we no longer trim the string (with
6
+ `String#strip`) before splitting if the delimiter is omitted
7
+
8
+ ## 0.6.0 - 2020-08-20
9
+
10
+ #### Breaking Changes
11
+
12
+ - `ss.split(str, " ")` is no longer treated the same as `ss.split(str)` i.e.
13
+ unlike Ruby's `String#split`, the former no longer strips the string before
14
+ splitting
15
+ - rename the `remove_empty` option `remove_empty_fields`
16
+ - rename the `exclude` option `except` (alias for `reject`)
17
+
18
+ #### Features
19
+
20
+ - add support for descending, negative, and infinite ranges,
21
+ e.g. `ss.split(str, ":", at: [..4, 4..., 3..1, -1..-3])` etc.
22
+
23
+ #### Fixes
24
+
25
+ - correctly handle backreferences in delimiter patterns
26
+
27
+ ## 0.5.1 - 2018-07-01
28
+
29
+ #### Changes
30
+
31
+ - set StringSplitter::VERSION when `string_splitter.rb` is loaded
32
+
33
+ ## 0.5.0 - 2018-06-26
34
+
35
+ #### Features
36
+
37
+ - add a `reject`/`exclude` option which rejects splits at the specified positions
38
+ - add a `select` alias for `at`
39
+
40
+ #### Fixes
41
+
42
+ - don't treat string delimiters as patterns
43
+
44
+ ## 0.4.0 - 2018-06-24
45
+
46
+ #### Breaking Changes
47
+
48
+ - remove the `offset` alias for `split.index`
49
+
1
50
  ## 0.3.1 - 2018-06-24
2
51
 
3
- - remove trailing empty field when the separator is empty (#1)
52
+ #### Fixes
53
+
54
+ - remove trailing empty field when the separator is empty
55
+ ([#1](https://github.com/chocolateboy/string_splitter/issues/1))
4
56
 
5
57
  ## 0.3.0 - 2018-06-23
6
58
 
7
- - **breaking change**: rename the `default_separator` option to `default_delimiter`
8
- - to avoid ambiguity in the code, refer to the input pattern/string as the
9
- "delimiter" and the matched string as the "separator"
59
+ #### Breaking Changes
60
+
61
+ - rename the `default_separator` option `default_delimiter`
10
62
 
11
63
  ## 0.2.0 - 2018-06-22
12
64
 
13
- - **breaking change**: make `index` (AKA `offset`) 0-based and add `position`
14
- (AKA `pos`) as the 1-based accessor
65
+ #### Breaking Changes
66
+
67
+ - make `index` (AKA `offset`) 0-based and add `position` (AKA `pos`) as the
68
+ 1-based accessor
15
69
 
16
70
  ## 0.1.0 - 2018-06-22
17
71
 
18
- - **breaking change**: the block now takes a single `split` object with an
19
- `index` accessor, rather than seperate `index` and `split` arguments
72
+ #### Breaking Changes
73
+
74
+ - the block now takes a single `split` object with an `index` accessor, rather
75
+ than separate `index` and `split` arguments
76
+
77
+ #### Features
78
+
20
79
  - add support for negative indices in the value supplied to the `at` option
21
80
  - add a `count` field to the split object containing the total number of splits
22
81
 
data/README.md CHANGED
@@ -3,14 +3,16 @@
3
3
  [![Build Status](https://travis-ci.org/chocolateboy/string_splitter.svg)](https://travis-ci.org/chocolateboy/string_splitter)
4
4
  [![Gem Version](https://img.shields.io/gem/v/string_splitter.svg)](https://rubygems.org/gems/string_splitter)
5
5
 
6
- <!-- START doctoc generated TOC please keep comment here to allow auto update -->
7
- <!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
6
+ <!-- toc -->
8
7
 
9
8
  - [NAME](#name)
10
9
  - [INSTALLATION](#installation)
11
10
  - [SYNOPSIS](#synopsis)
12
11
  - [DESCRIPTION](#description)
13
12
  - [WHY?](#why)
13
+ - [CAVEATS](#caveats)
14
+ - [Differences from String#split](#differences-from-string%23split)
15
+ - [COMPATIBILITY](#compatibility)
14
16
  - [VERSION](#version)
15
17
  - [SEE ALSO](#see-also)
16
18
  - [Gems](#gems)
@@ -18,7 +20,7 @@
18
20
  - [AUTHOR](#author)
19
21
  - [COPYRIGHT AND LICENSE](#copyright-and-license)
20
22
 
21
- <!-- END doctoc generated TOC please keep comment here to allow auto update -->
23
+ <!-- tocstop -->
22
24
 
23
25
  # NAME
24
26
 
@@ -36,65 +38,128 @@ gem "string_splitter"
36
38
  require "string_splitter"
37
39
 
38
40
  ss = StringSplitter.new
41
+ ```
42
+
43
+ **Same as `String#split`**
39
44
 
40
- # same as String#split
41
- ss.split("foo bar baz quux")
42
- ss.split("foo bar baz quux", " ")
43
- ss.split("foo bar baz quux", /\s+/)
44
- # => ["foo", "bar", "baz", "quux"]
45
+ ```ruby
46
+ ss.split("foo bar baz")
47
+ ss.split("foo bar baz", " ")
48
+ ss.split("foo bar baz", /\s+/)
49
+ # => ["foo", "bar", "baz"]
50
+
51
+ ss.split("foo", "")
52
+ ss.split("foo", //)
53
+ # => ["f", "o", "o"]
54
+
55
+ ss.split("", "...")
56
+ ss.split("", /.../)
57
+ # => []
58
+ ```
45
59
 
46
- # split at the first delimiter
60
+ **Split at the first delimiter**
61
+
62
+ ```ruby
47
63
  ss.split("foo:bar:baz:quux", ":", at: 1)
64
+ ss.split("foo:bar:baz:quux", ":", select: 1)
48
65
  # => ["foo", "bar:baz:quux"]
66
+ ```
49
67
 
50
- # split at the last delimiter
68
+ **Split at the last delimiter**
69
+
70
+ ```ruby
51
71
  ss.split("foo:bar:baz:quux", ":", at: -1)
52
72
  # => ["foo:bar:baz", "quux"]
73
+ ```
74
+
75
+ **Split at multiple delimiter positions**
76
+
77
+ ```ruby
78
+ ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -1])
79
+ # => ["1", "2", "3", "4:5:6:7:8", "9"]
80
+ ```
53
81
 
54
- # split at multiple delimiter positions
55
- ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -2])
56
- # => ["1", "2", "3", "4:5:6:7", "8:9"]
82
+ **Split at all but the first and last delimiters**
57
83
 
58
- # split from the right
84
+ ```ruby
85
+ ss.split("1:2:3:4:5:6", ":", except: [1, -1])
86
+ ss.split("1:2:3:4:5:6", ":", reject: [1, -1])
87
+ # => ["1:2", "3", "4", "5:6"]
88
+ ```
89
+
90
+ **Split from the right**
91
+
92
+ ```ruby
59
93
  ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3, 5])
60
94
  # => ["1:2:3:4", "5:6", "7", "8", "9"]
95
+ ```
96
+
97
+ **Split with negative, descending, and infinite ranges**
98
+
99
+ ```ruby
100
+ ss.split("1:2:3:4:5:6:7:8:9", ":", at: ..-3)
101
+ # => ["1", "2", "3", "4", "5", "6", "7:8:9"]
102
+
103
+ ss.split("1:2:3:4:5:6:7:8:9", ":", at: 4...)
104
+ # => ["1:2:3:4", "5", "6", "7", "8:9"]
105
+
106
+ ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1, 5..3, -2..])
107
+ # => ["1", "2:3", "4", "5", "6:7", "8", "9"]
108
+ ```
109
+
110
+ **Full control via a block**
61
111
 
62
- # full control via a block
63
- result = ss.split('a:a:a:b:c:c:e:a:a:d:c', ":") do |split|
64
- split.index > 0 && split.lhs == split.rhs
112
+ ```ruby
113
+ result = ss.split("1:2:3:4:5:6:7:8", ":") do |split|
114
+ split.pos % 2 == 0
65
115
  end
66
- # => ["a:a", "a:b:c", "c:e:a", "a:d:c"]
116
+ # => ["1:2", "3:4", "5:6", "7:8"]
117
+ ```
118
+
119
+ ```ruby
120
+ string = "banana".chars.sort.join # "aaabnn"
121
+
122
+ ss.split(string, "") do |split|
123
+ split.rhs != split.lhs
124
+ end
125
+ # => ["aaa", "b", "nn"]
67
126
  ```
68
127
 
69
128
  # DESCRIPTION
70
129
 
71
- Many languages have built-in string `split` functions/methods. They behave similarly
72
- (notwithstanding the occasional [surprise](https://chriszetter.com/blog/2017/10/29/splitting-strings/)),
73
- and handle a few common cases e.g.:
130
+ Many languages have built-in `split` functions/methods for strings. They behave
131
+ similarly (notwithstanding the occasional
132
+ [surprise](https://chriszetter.com/blog/2017/10/29/splitting-strings/)), and
133
+ handle a few common cases e.g.:
74
134
 
75
135
  * limiting the number of splits
76
- * including the separators in the results
136
+ * including the separator(s) in the results
77
137
  * removing (some) empty fields
78
138
 
79
- But, because the API is squeezed into two overloaded parameters (the delimiter and the limit),
80
- achieving the desired effects can be tricky. For instance, while `String#split` removes empty
81
- trailing fields (by default), it provides no way to remove *all* empty fields. Likewise, the
82
- cramped API means there's no way to e.g. combine a limit (positive integer) with the option
83
- to preserve empty fields (negative integer), or use backreferences in a delimiter pattern
139
+ But, because the API is squeezed into two overloaded parameters (the delimiter
140
+ and the limit), achieving the desired results can be tricky. For instance,
141
+ while `String#split` removes empty trailing fields (by default), it provides no
142
+ way to remove *all* empty fields. Likewise, the cramped API means there's no
143
+ way to e.g. combine a limit (positive integer) with the option to preserve
144
+ empty fields (negative integer), or use backreferences in a delimiter pattern
84
145
  without including its captured subexpressions in the result.
85
146
 
86
- If `split` was being written from scratch, without the baggage of its legacy API,
87
- it's possible that some of these options would be made explicit rather than overloading
88
- the parameters. And, indeed, this is possible in some implementations,
89
- e.g. in Crystal:
147
+ If `split` was being written from scratch, without the baggage of its legacy
148
+ API, it's possible that some of these options would be made explicit rather
149
+ than overloading the parameters. And, indeed, this is possible in some
150
+ implementations, e.g. in Crystal:
90
151
 
91
152
  ```ruby
92
- ":foo:bar:baz:".split(":", remove_empty: false) # => ["", "foo", "bar", "baz", ""]
93
- ":foo:bar:baz:".split(":", remove_empty: true) # => ["foo", "bar", "baz"]
153
+ ":foo:bar:baz:".split(":", remove_empty: false)
154
+ # => ["", "foo", "bar", "baz", ""]
155
+
156
+ ":foo:bar:baz:".split(":", remove_empty: true)
157
+ # => ["foo", "bar", "baz"]
94
158
  ````
95
159
 
96
- StringSplitter takes this one step further by moving the configuration out of the method altogether
97
- and delegating the strategy — i.e. which splits should be accepted or rejected — to a block:
160
+ StringSplitter takes this one step further by moving the configuration out of
161
+ the method altogether and delegating the strategy — i.e. which splits should be
162
+ accepted or rejected — to a block:
98
163
 
99
164
  ```ruby
100
165
  ss = StringSplitter.new
@@ -102,22 +167,32 @@ ss = StringSplitter.new
102
167
  ss.split("foo:bar:baz", ":") { |split| split.index == 0 }
103
168
  # => ["foo", "bar:baz"]
104
169
 
105
- ss.split("foo:bar:baz", ":") { |split| split.position == split.count }
106
- # => ["foo:bar", "baz"]
170
+ ss.split("foo:bar:baz:quux", ":") do |split|
171
+ split.position == 1 || split.position == 3
172
+ end
173
+ # => ["foo", "bar:baz", "quux"]
107
174
  ```
108
175
 
109
- As a shortcut, the common case of splitting on delimiters at one or more positions is supported by an option:
176
+ As a shortcut, the common case of splitting (or not splitting) at one or more
177
+ positions is supported by dedicated options:
110
178
 
111
179
  ```ruby
112
- ss.split('foo:bar:baz:quux', ':', at: [1, -1]) # => ["foo", "bar:baz", "quux"]
180
+ ss.split("foo:bar:baz:quux", ":", select: [1, -1])
181
+ # => ["foo", "bar:baz", "quux"]
182
+
183
+ ss.split("foo:bar:baz:quux", ":", reject: [1, -1])
184
+ # => ["foo:bar", "baz:quux"]
113
185
  ```
114
186
 
115
187
  # WHY?
116
188
 
117
- I wanted to split semi-structured output into fields without having to resort to a regex or a full-blown parser.
189
+ I wanted to split semi-structured output into fields without having to resort
190
+ to a regex or a full-blown parser.
118
191
 
119
- As an example, the nominally unstructured output of many Unix commands is often formatted in a way
120
- that's tantalizingly close to being machine-readable, apart from a few pesky exceptions e.g.:
192
+ As an example, the nominally unstructured output of many Unix commands is often
193
+ formatted in a way that's tantalizingly close to being
194
+ [machine-readable](https://en.wikipedia.org/wiki/Delimiter-separated_values),
195
+ apart from a few pesky exceptions e.g.:
121
196
 
122
197
  ```bash
123
198
  $ ls -l
@@ -129,8 +204,8 @@ drwxr-xr-x 3 user users 4096 Jun 19 22:56 lib
129
204
  -rw-r--r-- 1 user users 3134 Jun 19 22:59 README.md
130
205
  ```
131
206
 
132
- These lines can *almost* be parsed into an array of fields by splitting them on whitespace. The exception is the
133
- date (columns 6-8) i.e.:
207
+ These lines can *almost* be parsed into an array of fields by splitting them on
208
+ whitespace. The exception is the date (columns 6-8) i.e.:
134
209
 
135
210
  ```ruby
136
211
  line = "-rw-r--r-- 1 user users 87 Jun 18 18:16 CHANGELOG.md"
@@ -155,13 +230,14 @@ One way to work around this is to parse the whole line e.g.:
155
230
  line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+ \S+) \s+ (.+)$/x)
156
231
  ```
157
232
 
158
- But that requires us to specify *everything*. What we really want is a version of `split`
159
- which allows us to veto splitting for the 6th and 7th delimiters i.e. control over which
160
- splits are accepted, rather than being restricted to the single, baked-in strategy provided
161
- by the `limit` parameter.
233
+ But that requires us to specify *everything*. What we really want is a version
234
+ of `split` which allows us to veto splitting for the 6th and 7th delimiters
235
+ (and to stop after the 8th delimiter) i.e. control over which splits are
236
+ accepted, rather than being restricted to the single, baked-in strategy
237
+ provided by the `limit` parameter.
162
238
 
163
- By providing a simple way to accept or reject each split, StringSplitter makes cases like
164
- this easy to handle, either via a block:
239
+ By providing a simple way to accept or reject each split, StringSplitter makes
240
+ cases like this easy to handle, either via a block:
165
241
 
166
242
  ```ruby
167
243
  ss.split(line) do |split|
@@ -177,9 +253,51 @@ ss.split(line, at: [1..5, 8])
177
253
  # => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
178
254
  ```
179
255
 
256
+ # CAVEATS
257
+
258
+ ## Differences from String#split
259
+
260
+ Unlike `String#split`, StringSplitter doesn't trim the string before splitting
261
+ (with `String#strip`) if the delimiter is omitted or a single space, e.g.:
262
+
263
+ ```ruby
264
+ " foo bar baz ".split # => ["foo", "bar", "baz"]
265
+ " foo bar baz ".split(" ") # => ["foo", "bar", "baz"]
266
+
267
+ ss.split(" foo bar baz ") # => ["", "foo", "bar", "baz", ""]
268
+ ss.split(" foo bar baz ", " ") # => ["", "foo", "bar", "baz", ""]
269
+ ```
270
+
271
+ `String#split` omits the `nil` values of unmatched optional captures:
272
+
273
+ ```ruby
274
+ "foo:bar:baz".scan(/(:)|(-)/) # => [[":", nil], [":", nil]]
275
+ "foo:bar:baz".split(/(:)|(-)/) # => ["foo", ":", "bar", ":", "baz"]
276
+ ```
277
+
278
+ StringSplitter preserves them by default (if `include_captures` is true, as it
279
+ is by default), though they can be omitted from spread captures by passing
280
+ `:compact` as the value of the `spread_captures` option:
281
+
282
+ ```ruby
283
+ s1 = StringSplitter.new(spread_captures: true)
284
+ s2 = StringSplitter.new(spread_captures: false)
285
+ s3 = StringSplitter.new(spread_captures: :compact)
286
+
287
+ s1.split("foo:bar:baz", /(:)|(-)/) # => ["foo", ":", nil, "bar", ":", nil, "baz"]
288
+ s2.split("foo:bar:baz", /(:)|(-)/) # => ["foo", [":", nil], "bar", [":", nil], "baz"]
289
+ s3.split("foo:bar:baz", /(:)|(-)/) # => ["foo", ":", "bar", ":", "baz"]
290
+ ```
291
+
292
+ # COMPATIBILITY
293
+
294
+ StringSplitter is tested and supported on all versions of Ruby [supported by
295
+ the ruby-core team](https://www.ruby-lang.org/en/downloads/branches/), i.e.,
296
+ currently, Ruby 2.5 and above.
297
+
180
298
  # VERSION
181
299
 
182
- 0.3.1
300
+ 0.7.0
183
301
 
184
302
  # SEE ALSO
185
303
 
@@ -197,7 +315,7 @@ ss.split(line, at: [1..5, 8])
197
315
 
198
316
  # COPYRIGHT AND LICENSE
199
317
 
200
- Copyright © 2018 by chocolateboy.
318
+ Copyright © 2018-2020 by chocolateboy.
201
319
 
202
320
  This is free software; you can redistribute it and/or modify it under the
203
- terms of the [Artistic License 2.0](http://www.opensource.org/licenses/artistic-license-2.0.php).
321
+ terms of the [Artistic License 2.0](https://www.opensource.org/licenses/artistic-license-2.0.php).
@@ -1,250 +1,359 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'set'
3
4
  require 'values'
4
5
 
6
+ require_relative 'string_splitter/version'
7
+
5
8
  # This class extends the functionality of +String#split+ by:
6
9
  #
7
10
  # - providing full control over which splits are accepted or rejected
11
+ #
8
12
  # - adding support for splitting from right-to-left
9
- # - encapsulating splitting options/preferences in instances rather than trying to
10
- # cram them into overloaded method parameters
13
+ #
14
+ # - encapsulating splitting options/preferences in the splitter rather
15
+ # than trying to cram them into overloaded method parameters
11
16
  #
12
17
  # These enhancements allow splits to handle many cases that otherwise require bigger
13
- # guns e.g. regex matching or parsing.
18
+ # guns, e.g. regex matching or parsing.
19
+ #
20
+ # Implementation-wise, we split the string with a scanner which works in a similar
21
+ # way to +String#split+ and parse the resulting tokens into an array of Split objects
22
+ # with the following fields:
23
+ #
24
+ # - captures: separator substrings captured by parentheses in the delimiter pattern
25
+ # - count: the number of splits
26
+ # - index: the 0-based index of the split in the array
27
+ # - lhs: the string to the left of the separator (back to the previous split candidate)
28
+ # - position: the 1-based index of the split in the array (alias: pos)
29
+ # - rhs: the string to the right of the separator (up to the next split candidate)
30
+ # - rindex: the 0-based index of the split relative to the end of the array
31
+ # - rposition: the 1-based index of the split relative to the end of the array (alias: rpos)
32
+ # - separator: the string matched by the delimiter pattern/string
33
+ #
14
34
  class StringSplitter
15
- ACCEPT = ->(_split) { true }
16
- DEFAULT_DELIMITER = /\s+/
17
- NO_SPLITS = []
35
+ # terminology: the delimiter is what we provide and the separators are what we get
36
+ # back (if we capture them). e.g. for:
37
+ #
38
+ # ss.split("foo:bar::baz", /(\W+)/)
39
+ #
40
+ # the delimiter is /(\W)/ and the separators are ":" and "::"
41
+
42
+ ACCEPT_ALL = ->(_split) { true }
43
+ DEFAULT_DELIMITER = /\s+/.freeze
44
+ REMOVE = [].freeze
18
45
 
19
46
  Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator) do
20
47
  def position
21
48
  index + 1
22
49
  end
23
50
 
24
- alias_method :offset, :index
25
51
  alias_method :pos, :position
52
+
53
+ # 0-based index relative to the end of the array, e.g. for 5 items:
54
+ #
55
+ # index | rindex
56
+ # ------|-------
57
+ # 0 | 4
58
+ # 1 | 3
59
+ # 2 | 2
60
+ # 3 | 1
61
+ # 4 | 0
62
+ def rindex
63
+ count - position
64
+ end
65
+
66
+ # 1-based position relative to the end of the array, e.g. for 5 items:
67
+ #
68
+ # position | rposition
69
+ # ----------|----------
70
+ # 1 | 5
71
+ # 2 | 4
72
+ # 3 | 3
73
+ # 4 | 2
74
+ # 5 | 1
75
+ def rposition
76
+ count + 1 - position
77
+ end
78
+
79
+ alias_method :rpos, :rposition
80
+ end
81
+
82
+ # simulate an enum. the value is returned by the case statement
83
+ # in the generated block if the positions match
84
+ module Action
85
+ SELECT = true
86
+ REJECT = false
26
87
  end
27
88
 
89
+ private_constant :Action
90
+
28
91
  def initialize(
29
92
  default_delimiter: DEFAULT_DELIMITER,
30
93
  include_captures: true,
31
- remove_empty: false,
94
+ remove_empty: false, # TODO remove this
95
+ remove_empty_fields: remove_empty,
32
96
  spread_captures: true
33
97
  )
34
98
  @default_delimiter = default_delimiter
35
99
  @include_captures = include_captures
36
- @remove_empty = remove_empty
100
+ @remove_empty_fields = remove_empty_fields
37
101
  @spread_captures = spread_captures
38
102
  end
39
103
 
40
- attr_reader :default_delimiter, :include_captures, :remove_empty, :spread_captures
41
-
42
- def split(string, delimiter = @default_delimiter, at: nil, &block)
43
- result, block, splits, count, index = split_common(string, delimiter, at, block)
104
+ attr_reader(
105
+ :default_delimiter,
106
+ :include_captures,
107
+ :remove_empty_fields,
108
+ :spread_captures
109
+ )
44
110
 
45
- splits.each do |split|
46
- split = Split.with(split.merge({ index: (index += 1), count: count }))
111
+ # TODO remove this
112
+ alias remove_empty remove_empty_fields
113
+
114
+ def split(
115
+ string,
116
+ delimiter = @default_delimiter,
117
+ at: nil, # alias for select
118
+ except: nil, # alias for reject
119
+ select: at,
120
+ reject: except,
121
+ &block
122
+ )
123
+ result, splits, count, accept = init(
124
+ string: string,
125
+ delimiter: delimiter,
126
+ select: select,
127
+ reject: reject,
128
+ block: block
129
+ )
130
+
131
+ return result unless splits
132
+
133
+ splits.each_with_index do |hash, index|
134
+ split = Split.with(hash.merge({ count: count, index: index }))
47
135
  result << split.lhs if result.empty?
48
136
 
49
- if block.call(split)
50
- if @include_captures
51
- if @spread_captures
52
- result += split.captures
53
- else
54
- result << split.captures
55
- end
56
- end
57
-
58
- result << split.rhs
137
+ if accept.call(split)
138
+ result << split.captures << split.rhs
59
139
  else
60
140
  # append the rhs
61
141
  result[-1] = result[-1] + split.separator + split.rhs
62
142
  end
63
143
  end
64
144
 
65
- result
145
+ render(result)
66
146
  end
67
147
 
68
148
  alias lsplit split
69
149
 
70
- def rsplit(string, delimiter = @default_delimiter, at: nil, &block)
71
- result, block, splits, count, index = split_common(string, delimiter, at, block)
72
-
73
- splits.reverse!.each do |split|
74
- split = Split.with(split.merge({ index: (index += 1), count: count }))
150
+ def rsplit(
151
+ string,
152
+ delimiter = @default_delimiter,
153
+ at: nil, # alias for select
154
+ except: nil, # alias for reject
155
+ select: at,
156
+ reject: except,
157
+ &block
158
+ )
159
+ result, splits, count, accept = init(
160
+ string: string,
161
+ delimiter: delimiter,
162
+ select: select,
163
+ reject: reject,
164
+ block: block
165
+ )
166
+
167
+ return result unless splits
168
+
169
+ splits.reverse_each.with_index do |hash, index|
170
+ split = Split.with(hash.merge({ count: count, index: index }))
75
171
  result.unshift(split.rhs) if result.empty?
76
172
 
77
- if block.call(split)
78
- if @include_captures
79
- if @spread_captures
80
- result = split.captures + result
81
- else
82
- result.unshift(split.captures)
83
- end
84
- end
85
-
86
- result.unshift(split.lhs)
173
+ if accept.call(split)
174
+ # [lhs + captures] + result
175
+ result.unshift(split.lhs, split.captures)
87
176
  else
88
177
  # prepend the lhs
89
178
  result[0] = split.lhs + split.separator + result[0]
90
179
  end
91
180
  end
92
181
 
93
- result
182
+ render(result)
94
183
  end
95
184
 
96
185
  private
97
186
 
98
- def splits_for(parts, ncaptures)
99
- result = []
100
- splits = []
101
-
102
- until parts.empty?
103
- lhs = parts.shift
104
- separator = parts.shift
105
- captures = parts.shift(ncaptures)
106
- rhs = parts.length == 1 ? parts.shift : parts.first
107
-
108
- if @remove_empty && (lhs.empty? || rhs.empty?)
109
- if lhs.empty? && rhs.empty?
110
- # do nothing
111
- elsif parts.empty? # last split
112
- result << (!lhs.empty? ? lhs : rhs) if splits.empty?
113
- elsif rhs.empty?
114
- # replace the empty rhs with the non-empty lhs
115
- parts[0] = lhs
116
- end
187
+ # initialisation common to +split+ and +rsplit+
188
+ #
189
+ # takes a hash of options passed to +split+ or +rsplit+ and returns a tuple with
190
+ # the following fields:
191
+ #
192
+ # - result: the array of separated strings to return from +split+ or +rsplit+.
193
+ # if the splits arry is empty, the caller returns this array immediately
194
+ # without any further processing
195
+ #
196
+ # - splits: an array of hashes containing the lhs, rhs, separator and captured
197
+ # separator substrings for each split
198
+ #
199
+ # - count: the number of splits
200
+ #
201
+ # - accept: a proc whose return value determines whether each split should be
202
+ # accepted (true) or rejected (false)
203
+ #
204
+ def init(string:, delimiter:, select:, reject:, block:)
205
+ if reject
206
+ positions = reject
207
+ action = Action::REJECT
208
+ elsif select
209
+ positions = select
210
+ action = Action::SELECT
211
+ end
117
212
 
118
- next
119
- end
213
+ splits = parse(string, delimiter)
120
214
 
121
- splits << {
122
- lhs: lhs,
123
- rhs: rhs,
124
- separator: separator,
125
- captures: captures,
126
- }
215
+ if splits.empty?
216
+ result = string.empty? ? [] : [string]
217
+ return [result]
127
218
  end
128
219
 
129
- [result, splits]
220
+ block ||= positions ? compile(positions, action, splits.length) : ACCEPT_ALL
221
+ [[], splits, splits.length, block]
130
222
  end
131
223
 
132
- # setup common to both split methods
133
- def split_common(string, delimiter, at, block)
134
- unless (match = string.match(delimiter))
135
- result = (@remove_empty && string.empty?) ? [] : [string]
136
- return [result, block, NO_SPLITS, 0, -1]
224
+ def render(values)
225
+ values.flat_map do |value|
226
+ if value.is_a?(String)
227
+ value.empty? && @remove_empty_fields ? REMOVE : [value]
228
+ elsif @include_captures
229
+ if @spread_captures
230
+ @spread_captures == :compact ? value.compact : value
231
+ elsif value.empty?
232
+ # we expose non-captures (string delimiters or regexps with no
233
+ # captures) as empty arrays inside the block, so the type is
234
+ # consistent, but it doesn't make sense to keep them in the
235
+ # result
236
+ REMOVE
237
+ else
238
+ [value]
239
+ end
240
+ else
241
+ REMOVE
242
+ end
137
243
  end
138
-
139
- ncaptures = match.captures.length
140
- delimiter = increment_backrefs(delimiter, ncaptures)
141
- parts = string.split(/(#{delimiter})/, -1)
142
- remove_trailing_empty_field!(parts, ncaptures)
143
- result, splits = splits_for(parts, ncaptures)
144
- count = splits.length
145
- block ||= at ? match_positions(at, count) : ACCEPT
146
-
147
- [result, block, splits, count, -1]
148
244
  end
149
245
 
150
- # increment back-references so they remain valid when the outer capture
151
- # is added.
152
- #
153
- # e.g. to split on:
246
+ # takes a string and a delimiter pattern (regex or string) and splits it along
247
+ # the delimiter, returning an array of objects (hashes) representing each split.
248
+ # e.g. for:
154
249
  #
155
- # - <foo-comment> ... </foo-comment>
156
- # - <bar-comment> ... </bar-comment>
250
+ # parse.split("foo:bar:baz:quux", ":")
157
251
  #
158
- # etc.
252
+ # we return:
159
253
  #
160
- # before:
254
+ # [
255
+ # { lhs: "foo", rhs: "bar", separator: ":", captures: [] },
256
+ # { lhs: "bar", rhs: "baz", separator: ":", captures: [] },
257
+ # { lhs: "baz", rhs: "quux", separator: ":", captures: [] },
258
+ # ]
161
259
  #
162
- # %r| <(\w+-comment)> [^<]* </\1> |x
163
- #
164
- # after:
165
- #
166
- # %r| ( <(\w+-comment)> [^<]* </\2> ) |x
260
+ def parse(string, delimiter)
261
+ result = []
262
+ start = 0
167
263
 
168
- def increment_backrefs(delimiter, ncaptures)
169
- if delimiter.is_a?(Regexp) && ncaptures > 0
170
- delimiter = delimiter.to_s.gsub(/\\(?:(\d+)|.)/) do
171
- match = Regexp.last_match
172
- match[1] ? '\\' + match[1].to_i.next.to_s : match[0]
173
- end
264
+ # we don't use the argument passed to the +scan+ block here because it's a
265
+ # string (the separator) if there are no captures, rather than an empty
266
+ # array. we use match.captures instead to get the array
267
+ string.scan(delimiter) do
268
+ match = Regexp.last_match
269
+ index, after = match.offset(0)
270
+ separator = match[0]
271
+
272
+ # ignore empty separators at the beginning and/or end of the string
273
+ next if separator.empty? && (index.zero? || after == string.length)
274
+
275
+ lhs = string.slice(start, index - start)
276
+ result.last[:rhs] = lhs unless result.empty?
277
+
278
+ # this is correct for the last/only match, but gets updated to the next
279
+ # match's lhs for other matches
280
+ rhs = match.post_match
281
+
282
+ result << {
283
+ captures: match.captures,
284
+ lhs: lhs,
285
+ rhs: rhs,
286
+ separator: separator,
287
+ }
288
+
289
+ # move the start index (the start of the next lhs) to the index after the
290
+ # last character of the separator
291
+ start = after
174
292
  end
175
293
 
176
- delimiter
294
+ result
177
295
  end
178
296
 
179
- # work around Ruby's (and Perl's and Groovy's) unhelpful behavior when splitting
180
- # on an empty string/pattern without removing trailing empty fields e.g.:
297
+ # returns a lambda which splits at (i.e. accepts or rejects splits at, depending
298
+ # on the action) the supplied positions
181
299
  #
182
- # "foobar".split("", -1)
183
- # "foobar".split(//, -1)
184
- # # => ["f", "o", "o", "b", "a", "r", ""]
300
+ # positions are preprocessed to support additional features: negative
301
+ # ranges, infinite ranges, and descending ranges, e.g.:
185
302
  #
186
- # "foobar".split(/()/, -1)
187
- # # => ["f", "", "o", "", "o", "", "b", "", "a", "", "r", "", ""]
303
+ # ss.split("foo:bar:baz:quux", ":", at: -1)
188
304
  #
189
- # "foobar".split(/(())/, -1)
190
- # # => ["f", "", "", "o", "", "", "o", "", "", "b", "", "", "a", "", "", "r", "", "", ""]
305
+ # translates to:
191
306
  #
192
- # *there is no such thing as an empty field whose separator is empty*, so
193
- # if String#split's result ends with an empty separator, 0 or more (empty)
194
- # captures and an empty field, we can safely remove them.
195
-
196
- def remove_trailing_empty_field!(parts, ncaptures)
197
- # the trailing field is at index -1. if there are 0 captures, the separator
198
- # is at -2:
199
- #
200
- # [empty_separator, empty_field]
201
- #
202
- # if there is 1 capture, the separator is at -3:
203
- #
204
- # [empty_separator, capture, empty_field]
307
+ # ss.split("foo:bar:baz:quux", ":", at: 3)
308
+ #
309
+ # and
310
+ #
311
+ # ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
312
+ # ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
313
+ #
314
+ # translate to:
315
+ #
316
+ # ss.split("foo:bar:baz:quux", ":", at: 6..8)
317
+ #
318
+ def compile(positions, action, count)
319
+ # XXX note: we don't use modulo, because we don't want
320
+ # out-of-bounds indices to silently work, e.g. we don't want:
205
321
  #
206
- # etc. therefore we find the separator by walking back
322
+ # ss.split("foo:bar:baz:quux", ":", at: -42)
207
323
  #
208
- # 1 (empty field)
209
- # + ncaptures
210
- # + 1 (separator)
324
+ # to mysteriously match when the index/position is 0/1
211
325
  #
212
- # steps from the end of the array i.e. ncaptures + 2
213
- count = ncaptures + 2
214
- separator_index = count * -1
215
-
216
- return unless parts[-1].empty? && parts[separator_index].empty?
217
-
218
- # drop the empty separator, the (empty) captures, and the trailing empty field
219
- parts.pop(count)
220
- end
221
-
222
- def match_positions(positions, nsplits)
223
- positions = Array(positions).map do |position|
224
- if position.is_a?(Integer) && position.negative?
225
- # translate negative indices to 1-based non-negative indices e.g:
226
- #
227
- # ss.split("foo:bar:baz:quux", ":", at: -1)
228
- #
229
- # translates to:
230
- #
231
- # ss.split("foo:bar:baz:quux", ":", at: 3)
232
- #
233
- # XXX note: we don't use modulo, because we don't want
234
- # out-of-bounds indices to silently work e.g. we don't want:
235
- #
236
- # ss.split("foo:bar:baz:quux", ":", -42)
237
- #
238
- # to mysteriously match when the position is 2
239
-
240
- nsplits + 1 + position
326
+ resolve = ->(int) { int.negative? ? count + 1 + int : int }
327
+
328
+ # don't use Array(...) to wrap these as we don't want to convert ranges
329
+ positions = positions.is_a?(Array) ? positions : [positions]
330
+
331
+ positions = positions.map do |position|
332
+ if position.is_a?(Integer)
333
+ resolve[position]
334
+ elsif position.is_a?(Range)
335
+ rbegin = position.begin
336
+ rend = position.end
337
+ rexc = position.exclude_end?
338
+
339
+ if rbegin.nil?
340
+ Range.new(1, resolve[rend], rexc)
341
+ elsif rend.nil?
342
+ Range.new(resolve[rbegin], count, rexc)
343
+ elsif rbegin.negative? || rend.negative? || (rend - rbegin).negative?
344
+ from = resolve[rbegin]
345
+ to = resolve[rend]
346
+ to < from ? Range.new(to, from, rexc) : Range.new(from, to, rexc)
347
+ else
348
+ position
349
+ end
350
+ elsif position.is_a?(Set)
351
+ position.map { |it| resolve[it] }.to_set
241
352
  else
242
353
  position
243
354
  end
244
355
  end
245
356
 
246
- lambda do |split|
247
- case split.position when *positions then true else false end
248
- end
357
+ ->(split) { case split.position when *positions then action else !action end }
249
358
  end
250
359
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class StringSplitter
4
- VERSION = '0.3.1'
4
+ VERSION = '0.7.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_splitter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - chocolateboy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-24 00:00:00.000000000 Z
11
+ date: 2020-08-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: values
@@ -30,42 +30,42 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.16'
33
+ version: '2.1'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.16'
40
+ version: '2.1'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: minitest
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '5.11'
47
+ version: '5.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '5.11'
54
+ version: '5.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: minitest-power_assert
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: 0.3.0
61
+ version: '0.3'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: 0.3.0
68
+ version: '0.3'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: minitest-reporters
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -86,29 +86,15 @@ dependencies:
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '10.0'
89
+ version: '13.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '10.0'
97
- - !ruby/object:Gem::Dependency
98
- name: rubocop
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: 0.54.0
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: 0.54.0
111
- description:
96
+ version: '13.0'
97
+ description:
112
98
  email: chocolate@cpan.org
113
99
  executables: []
114
100
  extensions: []
@@ -127,7 +113,7 @@ metadata:
127
113
  bug_tracker_uri: https://github.com/chocolateboy/string_splitter/issues
128
114
  changelog_uri: https://github.com/chocolateboy/string_splitter/blob/master/CHANGELOG.md
129
115
  source_code_uri: https://github.com/chocolateboy/string_splitter
130
- post_install_message:
116
+ post_install_message:
131
117
  rdoc_options: []
132
118
  require_paths:
133
119
  - lib
@@ -135,16 +121,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
135
121
  requirements:
136
122
  - - ">="
137
123
  - !ruby/object:Gem::Version
138
- version: '0'
124
+ version: '2.3'
139
125
  required_rubygems_version: !ruby/object:Gem::Requirement
140
126
  requirements:
141
127
  - - ">="
142
128
  - !ruby/object:Gem::Version
143
129
  version: '0'
144
130
  requirements: []
145
- rubyforge_project:
146
- rubygems_version: 2.7.7
147
- signing_key:
131
+ rubygems_version: 3.1.4
132
+ signing_key:
148
133
  specification_version: 4
149
134
  summary: String#split on steroids
150
135
  test_files: []