string_splitter 0.3.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 67fd08fc0c1d5928d849206b28130eadedbd7c38755f1c123f4c3d46cbbc5619
4
- data.tar.gz: b102be89d4c59f9a2d3dd4661277a4fd3a31816f7dbae1630f2f6954bedad62a
3
+ metadata.gz: 400534de6c3143ef81b2ad46a3a6432b7d83ef0900024ebdde3f06a4e1714890
4
+ data.tar.gz: 643f5af7b9e13321dfa97b045b124d0c5ea576868b13141c264122bc96baea5e
5
5
  SHA512:
6
- metadata.gz: 87d567793e20367c52625d5fa9dd6cea5470221b3d53bc54d0bd59f0f8835635d81a67e1fcecba5fcce5a116c6ba6c346c4b74fa563ac21bee5ff0d06d07ad8b
7
- data.tar.gz: eab3f78e4c61e77c7bb283eb50e871d665fd4a323913e9fbf525ab2a6bfa05f0ebbabf490284371c00be6690f937f485823a8bc9dc3f59aafc9bff71c8cbe893
6
+ metadata.gz: 35bed8fe69b33314813fbd68a8da0e8f4799b7891275ac601b157caeb0e0a3780f37ec7e7876d808b8dfcbfdf7527f45c3af0dc0d679e133865e96949a1d9ce3
7
+ data.tar.gz: 8186e40d57654daf1a481ab74c128910f7aa346bc343a0a9933dc39b7cceeb204c1a55ac39b39321df46f7d02420fd87f93dd4a708be0a985d94833df018da87
@@ -1,22 +1,81 @@
1
+ ## 0.7.0 - 2020-08-21
2
+
3
+ #### Breaking Changes
4
+
5
+ - `String#split` incompatibility: we no longer trim the string (with
6
+ `String#strip`) before splitting if the delimiter is omitted
7
+
8
+ ## 0.6.0 - 2020-08-20
9
+
10
+ #### Breaking Changes
11
+
12
+ - `ss.split(str, " ")` is no longer treated the same as `ss.split(str)` i.e.
13
+ unlike Ruby's `String#split`, the former no longer strips the string before
14
+ splitting
15
+ - rename the `remove_empty` option `remove_empty_fields`
16
+ - rename the `exclude` option `except` (alias for `reject`)
17
+
18
+ #### Features
19
+
20
+ - add support for descending, negative, and infinite ranges,
21
+ e.g. `ss.split(str, ":", at: [..4, 4..., 3..1, -1..-3])` etc.
22
+
23
+ #### Fixes
24
+
25
+ - correctly handle backreferences in delimiter patterns
26
+
27
+ ## 0.5.1 - 2018-07-01
28
+
29
+ #### Changes
30
+
31
+ - set StringSplitter::VERSION when `string_splitter.rb` is loaded
32
+
33
+ ## 0.5.0 - 2018-06-26
34
+
35
+ #### Features
36
+
37
+ - add a `reject`/`exclude` option which rejects splits at the specified positions
38
+ - add a `select` alias for `at`
39
+
40
+ #### Fixes
41
+
42
+ - don't treat string delimiters as patterns
43
+
44
+ ## 0.4.0 - 2018-06-24
45
+
46
+ #### Breaking Changes
47
+
48
+ - remove the `offset` alias for `split.index`
49
+
1
50
  ## 0.3.1 - 2018-06-24
2
51
 
3
- - remove trailing empty field when the separator is empty (#1)
52
+ #### Fixes
53
+
54
+ - remove trailing empty field when the separator is empty
55
+ ([#1](https://github.com/chocolateboy/string_splitter/issues/1))
4
56
 
5
57
  ## 0.3.0 - 2018-06-23
6
58
 
7
- - **breaking change**: rename the `default_separator` option to `default_delimiter`
8
- - to avoid ambiguity in the code, refer to the input pattern/string as the
9
- "delimiter" and the matched string as the "separator"
59
+ #### Breaking Changes
60
+
61
+ - rename the `default_separator` option `default_delimiter`
10
62
 
11
63
  ## 0.2.0 - 2018-06-22
12
64
 
13
- - **breaking change**: make `index` (AKA `offset`) 0-based and add `position`
14
- (AKA `pos`) as the 1-based accessor
65
+ #### Breaking Changes
66
+
67
+ - make `index` (AKA `offset`) 0-based and add `position` (AKA `pos`) as the
68
+ 1-based accessor
15
69
 
16
70
  ## 0.1.0 - 2018-06-22
17
71
 
18
- - **breaking change**: the block now takes a single `split` object with an
19
- `index` accessor, rather than seperate `index` and `split` arguments
72
+ #### Breaking Changes
73
+
74
+ - the block now takes a single `split` object with an `index` accessor, rather
75
+ than separate `index` and `split` arguments
76
+
77
+ #### Features
78
+
20
79
  - add support for negative indices in the value supplied to the `at` option
21
80
  - add a `count` field to the split object containing the total number of splits
22
81
 
data/README.md CHANGED
@@ -3,14 +3,16 @@
3
3
  [![Build Status](https://travis-ci.org/chocolateboy/string_splitter.svg)](https://travis-ci.org/chocolateboy/string_splitter)
4
4
  [![Gem Version](https://img.shields.io/gem/v/string_splitter.svg)](https://rubygems.org/gems/string_splitter)
5
5
 
6
- <!-- START doctoc generated TOC please keep comment here to allow auto update -->
7
- <!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
6
+ <!-- toc -->
8
7
 
9
8
  - [NAME](#name)
10
9
  - [INSTALLATION](#installation)
11
10
  - [SYNOPSIS](#synopsis)
12
11
  - [DESCRIPTION](#description)
13
12
  - [WHY?](#why)
13
+ - [CAVEATS](#caveats)
14
+ - [Differences from String#split](#differences-from-string%23split)
15
+ - [COMPATIBILITY](#compatibility)
14
16
  - [VERSION](#version)
15
17
  - [SEE ALSO](#see-also)
16
18
  - [Gems](#gems)
@@ -18,7 +20,7 @@
18
20
  - [AUTHOR](#author)
19
21
  - [COPYRIGHT AND LICENSE](#copyright-and-license)
20
22
 
21
- <!-- END doctoc generated TOC please keep comment here to allow auto update -->
23
+ <!-- tocstop -->
22
24
 
23
25
  # NAME
24
26
 
@@ -36,65 +38,128 @@ gem "string_splitter"
36
38
  require "string_splitter"
37
39
 
38
40
  ss = StringSplitter.new
41
+ ```
42
+
43
+ **Same as `String#split`**
39
44
 
40
- # same as String#split
41
- ss.split("foo bar baz quux")
42
- ss.split("foo bar baz quux", " ")
43
- ss.split("foo bar baz quux", /\s+/)
44
- # => ["foo", "bar", "baz", "quux"]
45
+ ```ruby
46
+ ss.split("foo bar baz")
47
+ ss.split("foo bar baz", " ")
48
+ ss.split("foo bar baz", /\s+/)
49
+ # => ["foo", "bar", "baz"]
50
+
51
+ ss.split("foo", "")
52
+ ss.split("foo", //)
53
+ # => ["f", "o", "o"]
54
+
55
+ ss.split("", "...")
56
+ ss.split("", /.../)
57
+ # => []
58
+ ```
45
59
 
46
- # split at the first delimiter
60
+ **Split at the first delimiter**
61
+
62
+ ```ruby
47
63
  ss.split("foo:bar:baz:quux", ":", at: 1)
64
+ ss.split("foo:bar:baz:quux", ":", select: 1)
48
65
  # => ["foo", "bar:baz:quux"]
66
+ ```
49
67
 
50
- # split at the last delimiter
68
+ **Split at the last delimiter**
69
+
70
+ ```ruby
51
71
  ss.split("foo:bar:baz:quux", ":", at: -1)
52
72
  # => ["foo:bar:baz", "quux"]
73
+ ```
74
+
75
+ **Split at multiple delimiter positions**
76
+
77
+ ```ruby
78
+ ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -1])
79
+ # => ["1", "2", "3", "4:5:6:7:8", "9"]
80
+ ```
53
81
 
54
- # split at multiple delimiter positions
55
- ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -2])
56
- # => ["1", "2", "3", "4:5:6:7", "8:9"]
82
+ **Split at all but the first and last delimiters**
57
83
 
58
- # split from the right
84
+ ```ruby
85
+ ss.split("1:2:3:4:5:6", ":", except: [1, -1])
86
+ ss.split("1:2:3:4:5:6", ":", reject: [1, -1])
87
+ # => ["1:2", "3", "4", "5:6"]
88
+ ```
89
+
90
+ **Split from the right**
91
+
92
+ ```ruby
59
93
  ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3, 5])
60
94
  # => ["1:2:3:4", "5:6", "7", "8", "9"]
95
+ ```
96
+
97
+ **Split with negative, descending, and infinite ranges**
98
+
99
+ ```ruby
100
+ ss.split("1:2:3:4:5:6:7:8:9", ":", at: ..-3)
101
+ # => ["1", "2", "3", "4", "5", "6", "7:8:9"]
102
+
103
+ ss.split("1:2:3:4:5:6:7:8:9", ":", at: 4...)
104
+ # => ["1:2:3:4", "5", "6", "7", "8:9"]
105
+
106
+ ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1, 5..3, -2..])
107
+ # => ["1", "2:3", "4", "5", "6:7", "8", "9"]
108
+ ```
109
+
110
+ **Full control via a block**
61
111
 
62
- # full control via a block
63
- result = ss.split('a:a:a:b:c:c:e:a:a:d:c', ":") do |split|
64
- split.index > 0 && split.lhs == split.rhs
112
+ ```ruby
113
+ result = ss.split("1:2:3:4:5:6:7:8", ":") do |split|
114
+ split.pos % 2 == 0
65
115
  end
66
- # => ["a:a", "a:b:c", "c:e:a", "a:d:c"]
116
+ # => ["1:2", "3:4", "5:6", "7:8"]
117
+ ```
118
+
119
+ ```ruby
120
+ string = "banana".chars.sort.join # "aaabnn"
121
+
122
+ ss.split(string, "") do |split|
123
+ split.rhs != split.lhs
124
+ end
125
+ # => ["aaa", "b", "nn"]
67
126
  ```
68
127
 
69
128
  # DESCRIPTION
70
129
 
71
- Many languages have built-in string `split` functions/methods. They behave similarly
72
- (notwithstanding the occasional [surprise](https://chriszetter.com/blog/2017/10/29/splitting-strings/)),
73
- and handle a few common cases e.g.:
130
+ Many languages have built-in `split` functions/methods for strings. They behave
131
+ similarly (notwithstanding the occasional
132
+ [surprise](https://chriszetter.com/blog/2017/10/29/splitting-strings/)), and
133
+ handle a few common cases e.g.:
74
134
 
75
135
  * limiting the number of splits
76
- * including the separators in the results
136
+ * including the separator(s) in the results
77
137
  * removing (some) empty fields
78
138
 
79
- But, because the API is squeezed into two overloaded parameters (the delimiter and the limit),
80
- achieving the desired effects can be tricky. For instance, while `String#split` removes empty
81
- trailing fields (by default), it provides no way to remove *all* empty fields. Likewise, the
82
- cramped API means there's no way to e.g. combine a limit (positive integer) with the option
83
- to preserve empty fields (negative integer), or use backreferences in a delimiter pattern
139
+ But, because the API is squeezed into two overloaded parameters (the delimiter
140
+ and the limit), achieving the desired results can be tricky. For instance,
141
+ while `String#split` removes empty trailing fields (by default), it provides no
142
+ way to remove *all* empty fields. Likewise, the cramped API means there's no
143
+ way to e.g. combine a limit (positive integer) with the option to preserve
144
+ empty fields (negative integer), or use backreferences in a delimiter pattern
84
145
  without including its captured subexpressions in the result.
85
146
 
86
- If `split` was being written from scratch, without the baggage of its legacy API,
87
- it's possible that some of these options would be made explicit rather than overloading
88
- the parameters. And, indeed, this is possible in some implementations,
89
- e.g. in Crystal:
147
+ If `split` was being written from scratch, without the baggage of its legacy
148
+ API, it's possible that some of these options would be made explicit rather
149
+ than overloading the parameters. And, indeed, this is possible in some
150
+ implementations, e.g. in Crystal:
90
151
 
91
152
  ```ruby
92
- ":foo:bar:baz:".split(":", remove_empty: false) # => ["", "foo", "bar", "baz", ""]
93
- ":foo:bar:baz:".split(":", remove_empty: true) # => ["foo", "bar", "baz"]
153
+ ":foo:bar:baz:".split(":", remove_empty: false)
154
+ # => ["", "foo", "bar", "baz", ""]
155
+
156
+ ":foo:bar:baz:".split(":", remove_empty: true)
157
+ # => ["foo", "bar", "baz"]
94
158
  ````
95
159
 
96
- StringSplitter takes this one step further by moving the configuration out of the method altogether
97
- and delegating the strategy — i.e. which splits should be accepted or rejected — to a block:
160
+ StringSplitter takes this one step further by moving the configuration out of
161
+ the method altogether and delegating the strategy — i.e. which splits should be
162
+ accepted or rejected — to a block:
98
163
 
99
164
  ```ruby
100
165
  ss = StringSplitter.new
@@ -102,22 +167,32 @@ ss = StringSplitter.new
102
167
  ss.split("foo:bar:baz", ":") { |split| split.index == 0 }
103
168
  # => ["foo", "bar:baz"]
104
169
 
105
- ss.split("foo:bar:baz", ":") { |split| split.position == split.count }
106
- # => ["foo:bar", "baz"]
170
+ ss.split("foo:bar:baz:quux", ":") do |split|
171
+ split.position == 1 || split.position == 3
172
+ end
173
+ # => ["foo", "bar:baz", "quux"]
107
174
  ```
108
175
 
109
- As a shortcut, the common case of splitting on delimiters at one or more positions is supported by an option:
176
+ As a shortcut, the common case of splitting (or not splitting) at one or more
177
+ positions is supported by dedicated options:
110
178
 
111
179
  ```ruby
112
- ss.split('foo:bar:baz:quux', ':', at: [1, -1]) # => ["foo", "bar:baz", "quux"]
180
+ ss.split("foo:bar:baz:quux", ":", select: [1, -1])
181
+ # => ["foo", "bar:baz", "quux"]
182
+
183
+ ss.split("foo:bar:baz:quux", ":", reject: [1, -1])
184
+ # => ["foo:bar", "baz:quux"]
113
185
  ```
114
186
 
115
187
  # WHY?
116
188
 
117
- I wanted to split semi-structured output into fields without having to resort to a regex or a full-blown parser.
189
+ I wanted to split semi-structured output into fields without having to resort
190
+ to a regex or a full-blown parser.
118
191
 
119
- As an example, the nominally unstructured output of many Unix commands is often formatted in a way
120
- that's tantalizingly close to being machine-readable, apart from a few pesky exceptions e.g.:
192
+ As an example, the nominally unstructured output of many Unix commands is often
193
+ formatted in a way that's tantalizingly close to being
194
+ [machine-readable](https://en.wikipedia.org/wiki/Delimiter-separated_values),
195
+ apart from a few pesky exceptions e.g.:
121
196
 
122
197
  ```bash
123
198
  $ ls -l
@@ -129,8 +204,8 @@ drwxr-xr-x 3 user users 4096 Jun 19 22:56 lib
129
204
  -rw-r--r-- 1 user users 3134 Jun 19 22:59 README.md
130
205
  ```
131
206
 
132
- These lines can *almost* be parsed into an array of fields by splitting them on whitespace. The exception is the
133
- date (columns 6-8) i.e.:
207
+ These lines can *almost* be parsed into an array of fields by splitting them on
208
+ whitespace. The exception is the date (columns 6-8) i.e.:
134
209
 
135
210
  ```ruby
136
211
  line = "-rw-r--r-- 1 user users 87 Jun 18 18:16 CHANGELOG.md"
@@ -155,13 +230,14 @@ One way to work around this is to parse the whole line e.g.:
155
230
  line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+ \S+) \s+ (.+)$/x)
156
231
  ```
157
232
 
158
- But that requires us to specify *everything*. What we really want is a version of `split`
159
- which allows us to veto splitting for the 6th and 7th delimiters i.e. control over which
160
- splits are accepted, rather than being restricted to the single, baked-in strategy provided
161
- by the `limit` parameter.
233
+ But that requires us to specify *everything*. What we really want is a version
234
+ of `split` which allows us to veto splitting for the 6th and 7th delimiters
235
+ (and to stop after the 8th delimiter) i.e. control over which splits are
236
+ accepted, rather than being restricted to the single, baked-in strategy
237
+ provided by the `limit` parameter.
162
238
 
163
- By providing a simple way to accept or reject each split, StringSplitter makes cases like
164
- this easy to handle, either via a block:
239
+ By providing a simple way to accept or reject each split, StringSplitter makes
240
+ cases like this easy to handle, either via a block:
165
241
 
166
242
  ```ruby
167
243
  ss.split(line) do |split|
@@ -177,9 +253,51 @@ ss.split(line, at: [1..5, 8])
177
253
  # => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
178
254
  ```
179
255
 
256
+ # CAVEATS
257
+
258
+ ## Differences from String#split
259
+
260
+ Unlike `String#split`, StringSplitter doesn't trim the string before splitting
261
+ (with `String#strip`) if the delimiter is omitted or a single space, e.g.:
262
+
263
+ ```ruby
264
+ " foo bar baz ".split # => ["foo", "bar", "baz"]
265
+ " foo bar baz ".split(" ") # => ["foo", "bar", "baz"]
266
+
267
+ ss.split(" foo bar baz ") # => ["", "foo", "bar", "baz", ""]
268
+ ss.split(" foo bar baz ", " ") # => ["", "foo", "bar", "baz", ""]
269
+ ```
270
+
271
+ `String#split` omits the `nil` values of unmatched optional captures:
272
+
273
+ ```ruby
274
+ "foo:bar:baz".scan(/(:)|(-)/) # => [[":", nil], [":", nil]]
275
+ "foo:bar:baz".split(/(:)|(-)/) # => ["foo", ":", "bar", ":", "baz"]
276
+ ```
277
+
278
+ StringSplitter preserves them by default (if `include_captures` is true, as it
279
+ is by default), though they can be omitted from spread captures by passing
280
+ `:compact` as the value of the `spread_captures` option:
281
+
282
+ ```ruby
283
+ s1 = StringSplitter.new(spread_captures: true)
284
+ s2 = StringSplitter.new(spread_captures: false)
285
+ s3 = StringSplitter.new(spread_captures: :compact)
286
+
287
+ s1.split("foo:bar:baz", /(:)|(-)/) # => ["foo", ":", nil, "bar", ":", nil, "baz"]
288
+ s2.split("foo:bar:baz", /(:)|(-)/) # => ["foo", [":", nil], "bar", [":", nil], "baz"]
289
+ s3.split("foo:bar:baz", /(:)|(-)/) # => ["foo", ":", "bar", ":", "baz"]
290
+ ```
291
+
292
+ # COMPATIBILITY
293
+
294
+ StringSplitter is tested and supported on all versions of Ruby [supported by
295
+ the ruby-core team](https://www.ruby-lang.org/en/downloads/branches/), i.e.,
296
+ currently, Ruby 2.5 and above.
297
+
180
298
  # VERSION
181
299
 
182
- 0.3.1
300
+ 0.7.0
183
301
 
184
302
  # SEE ALSO
185
303
 
@@ -197,7 +315,7 @@ ss.split(line, at: [1..5, 8])
197
315
 
198
316
  # COPYRIGHT AND LICENSE
199
317
 
200
- Copyright © 2018 by chocolateboy.
318
+ Copyright © 2018-2020 by chocolateboy.
201
319
 
202
320
  This is free software; you can redistribute it and/or modify it under the
203
- terms of the [Artistic License 2.0](http://www.opensource.org/licenses/artistic-license-2.0.php).
321
+ terms of the [Artistic License 2.0](https://www.opensource.org/licenses/artistic-license-2.0.php).
@@ -1,250 +1,359 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'set'
3
4
  require 'values'
4
5
 
6
+ require_relative 'string_splitter/version'
7
+
5
8
  # This class extends the functionality of +String#split+ by:
6
9
  #
7
10
  # - providing full control over which splits are accepted or rejected
11
+ #
8
12
  # - adding support for splitting from right-to-left
9
- # - encapsulating splitting options/preferences in instances rather than trying to
10
- # cram them into overloaded method parameters
13
+ #
14
+ # - encapsulating splitting options/preferences in the splitter rather
15
+ # than trying to cram them into overloaded method parameters
11
16
  #
12
17
  # These enhancements allow splits to handle many cases that otherwise require bigger
13
- # guns e.g. regex matching or parsing.
18
+ # guns, e.g. regex matching or parsing.
19
+ #
20
+ # Implementation-wise, we split the string with a scanner which works in a similar
21
+ # way to +String#split+ and parse the resulting tokens into an array of Split objects
22
+ # with the following fields:
23
+ #
24
+ # - captures: separator substrings captured by parentheses in the delimiter pattern
25
+ # - count: the number of splits
26
+ # - index: the 0-based index of the split in the array
27
+ # - lhs: the string to the left of the separator (back to the previous split candidate)
28
+ # - position: the 1-based index of the split in the array (alias: pos)
29
+ # - rhs: the string to the right of the separator (up to the next split candidate)
30
+ # - rindex: the 0-based index of the split relative to the end of the array
31
+ # - rposition: the 1-based index of the split relative to the end of the array (alias: rpos)
32
+ # - separator: the string matched by the delimiter pattern/string
33
+ #
14
34
  class StringSplitter
15
- ACCEPT = ->(_split) { true }
16
- DEFAULT_DELIMITER = /\s+/
17
- NO_SPLITS = []
35
+ # terminology: the delimiter is what we provide and the separators are what we get
36
+ # back (if we capture them). e.g. for:
37
+ #
38
+ # ss.split("foo:bar::baz", /(\W+)/)
39
+ #
40
+ # the delimiter is /(\W)/ and the separators are ":" and "::"
41
+
42
+ ACCEPT_ALL = ->(_split) { true }
43
+ DEFAULT_DELIMITER = /\s+/.freeze
44
+ REMOVE = [].freeze
18
45
 
19
46
  Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator) do
20
47
  def position
21
48
  index + 1
22
49
  end
23
50
 
24
- alias_method :offset, :index
25
51
  alias_method :pos, :position
52
+
53
+ # 0-based index relative to the end of the array, e.g. for 5 items:
54
+ #
55
+ # index | rindex
56
+ # ------|-------
57
+ # 0 | 4
58
+ # 1 | 3
59
+ # 2 | 2
60
+ # 3 | 1
61
+ # 4 | 0
62
+ def rindex
63
+ count - position
64
+ end
65
+
66
+ # 1-based position relative to the end of the array, e.g. for 5 items:
67
+ #
68
+ # position | rposition
69
+ # ----------|----------
70
+ # 1 | 5
71
+ # 2 | 4
72
+ # 3 | 3
73
+ # 4 | 2
74
+ # 5 | 1
75
+ def rposition
76
+ count + 1 - position
77
+ end
78
+
79
+ alias_method :rpos, :rposition
80
+ end
81
+
82
+ # simulate an enum. the value is returned by the case statement
83
+ # in the generated block if the positions match
84
+ module Action
85
+ SELECT = true
86
+ REJECT = false
26
87
  end
27
88
 
89
+ private_constant :Action
90
+
28
91
  def initialize(
29
92
  default_delimiter: DEFAULT_DELIMITER,
30
93
  include_captures: true,
31
- remove_empty: false,
94
+ remove_empty: false, # TODO remove this
95
+ remove_empty_fields: remove_empty,
32
96
  spread_captures: true
33
97
  )
34
98
  @default_delimiter = default_delimiter
35
99
  @include_captures = include_captures
36
- @remove_empty = remove_empty
100
+ @remove_empty_fields = remove_empty_fields
37
101
  @spread_captures = spread_captures
38
102
  end
39
103
 
40
- attr_reader :default_delimiter, :include_captures, :remove_empty, :spread_captures
41
-
42
- def split(string, delimiter = @default_delimiter, at: nil, &block)
43
- result, block, splits, count, index = split_common(string, delimiter, at, block)
104
+ attr_reader(
105
+ :default_delimiter,
106
+ :include_captures,
107
+ :remove_empty_fields,
108
+ :spread_captures
109
+ )
44
110
 
45
- splits.each do |split|
46
- split = Split.with(split.merge({ index: (index += 1), count: count }))
111
+ # TODO remove this
112
+ alias remove_empty remove_empty_fields
113
+
114
+ def split(
115
+ string,
116
+ delimiter = @default_delimiter,
117
+ at: nil, # alias for select
118
+ except: nil, # alias for reject
119
+ select: at,
120
+ reject: except,
121
+ &block
122
+ )
123
+ result, splits, count, accept = init(
124
+ string: string,
125
+ delimiter: delimiter,
126
+ select: select,
127
+ reject: reject,
128
+ block: block
129
+ )
130
+
131
+ return result unless splits
132
+
133
+ splits.each_with_index do |hash, index|
134
+ split = Split.with(hash.merge({ count: count, index: index }))
47
135
  result << split.lhs if result.empty?
48
136
 
49
- if block.call(split)
50
- if @include_captures
51
- if @spread_captures
52
- result += split.captures
53
- else
54
- result << split.captures
55
- end
56
- end
57
-
58
- result << split.rhs
137
+ if accept.call(split)
138
+ result << split.captures << split.rhs
59
139
  else
60
140
  # append the rhs
61
141
  result[-1] = result[-1] + split.separator + split.rhs
62
142
  end
63
143
  end
64
144
 
65
- result
145
+ render(result)
66
146
  end
67
147
 
68
148
  alias lsplit split
69
149
 
70
- def rsplit(string, delimiter = @default_delimiter, at: nil, &block)
71
- result, block, splits, count, index = split_common(string, delimiter, at, block)
72
-
73
- splits.reverse!.each do |split|
74
- split = Split.with(split.merge({ index: (index += 1), count: count }))
150
+ def rsplit(
151
+ string,
152
+ delimiter = @default_delimiter,
153
+ at: nil, # alias for select
154
+ except: nil, # alias for reject
155
+ select: at,
156
+ reject: except,
157
+ &block
158
+ )
159
+ result, splits, count, accept = init(
160
+ string: string,
161
+ delimiter: delimiter,
162
+ select: select,
163
+ reject: reject,
164
+ block: block
165
+ )
166
+
167
+ return result unless splits
168
+
169
+ splits.reverse_each.with_index do |hash, index|
170
+ split = Split.with(hash.merge({ count: count, index: index }))
75
171
  result.unshift(split.rhs) if result.empty?
76
172
 
77
- if block.call(split)
78
- if @include_captures
79
- if @spread_captures
80
- result = split.captures + result
81
- else
82
- result.unshift(split.captures)
83
- end
84
- end
85
-
86
- result.unshift(split.lhs)
173
+ if accept.call(split)
174
+ # [lhs + captures] + result
175
+ result.unshift(split.lhs, split.captures)
87
176
  else
88
177
  # prepend the lhs
89
178
  result[0] = split.lhs + split.separator + result[0]
90
179
  end
91
180
  end
92
181
 
93
- result
182
+ render(result)
94
183
  end
95
184
 
96
185
  private
97
186
 
98
- def splits_for(parts, ncaptures)
99
- result = []
100
- splits = []
101
-
102
- until parts.empty?
103
- lhs = parts.shift
104
- separator = parts.shift
105
- captures = parts.shift(ncaptures)
106
- rhs = parts.length == 1 ? parts.shift : parts.first
107
-
108
- if @remove_empty && (lhs.empty? || rhs.empty?)
109
- if lhs.empty? && rhs.empty?
110
- # do nothing
111
- elsif parts.empty? # last split
112
- result << (!lhs.empty? ? lhs : rhs) if splits.empty?
113
- elsif rhs.empty?
114
- # replace the empty rhs with the non-empty lhs
115
- parts[0] = lhs
116
- end
187
+ # initialisation common to +split+ and +rsplit+
188
+ #
189
+ # takes a hash of options passed to +split+ or +rsplit+ and returns a tuple with
190
+ # the following fields:
191
+ #
192
+ # - result: the array of separated strings to return from +split+ or +rsplit+.
193
+ # if the splits arry is empty, the caller returns this array immediately
194
+ # without any further processing
195
+ #
196
+ # - splits: an array of hashes containing the lhs, rhs, separator and captured
197
+ # separator substrings for each split
198
+ #
199
+ # - count: the number of splits
200
+ #
201
+ # - accept: a proc whose return value determines whether each split should be
202
+ # accepted (true) or rejected (false)
203
+ #
204
+ def init(string:, delimiter:, select:, reject:, block:)
205
+ if reject
206
+ positions = reject
207
+ action = Action::REJECT
208
+ elsif select
209
+ positions = select
210
+ action = Action::SELECT
211
+ end
117
212
 
118
- next
119
- end
213
+ splits = parse(string, delimiter)
120
214
 
121
- splits << {
122
- lhs: lhs,
123
- rhs: rhs,
124
- separator: separator,
125
- captures: captures,
126
- }
215
+ if splits.empty?
216
+ result = string.empty? ? [] : [string]
217
+ return [result]
127
218
  end
128
219
 
129
- [result, splits]
220
+ block ||= positions ? compile(positions, action, splits.length) : ACCEPT_ALL
221
+ [[], splits, splits.length, block]
130
222
  end
131
223
 
132
- # setup common to both split methods
133
- def split_common(string, delimiter, at, block)
134
- unless (match = string.match(delimiter))
135
- result = (@remove_empty && string.empty?) ? [] : [string]
136
- return [result, block, NO_SPLITS, 0, -1]
224
+ def render(values)
225
+ values.flat_map do |value|
226
+ if value.is_a?(String)
227
+ value.empty? && @remove_empty_fields ? REMOVE : [value]
228
+ elsif @include_captures
229
+ if @spread_captures
230
+ @spread_captures == :compact ? value.compact : value
231
+ elsif value.empty?
232
+ # we expose non-captures (string delimiters or regexps with no
233
+ # captures) as empty arrays inside the block, so the type is
234
+ # consistent, but it doesn't make sense to keep them in the
235
+ # result
236
+ REMOVE
237
+ else
238
+ [value]
239
+ end
240
+ else
241
+ REMOVE
242
+ end
137
243
  end
138
-
139
- ncaptures = match.captures.length
140
- delimiter = increment_backrefs(delimiter, ncaptures)
141
- parts = string.split(/(#{delimiter})/, -1)
142
- remove_trailing_empty_field!(parts, ncaptures)
143
- result, splits = splits_for(parts, ncaptures)
144
- count = splits.length
145
- block ||= at ? match_positions(at, count) : ACCEPT
146
-
147
- [result, block, splits, count, -1]
148
244
  end
149
245
 
150
- # increment back-references so they remain valid when the outer capture
151
- # is added.
152
- #
153
- # e.g. to split on:
246
+ # takes a string and a delimiter pattern (regex or string) and splits it along
247
+ # the delimiter, returning an array of objects (hashes) representing each split.
248
+ # e.g. for:
154
249
  #
155
- # - <foo-comment> ... </foo-comment>
156
- # - <bar-comment> ... </bar-comment>
250
+ # parse.split("foo:bar:baz:quux", ":")
157
251
  #
158
- # etc.
252
+ # we return:
159
253
  #
160
- # before:
254
+ # [
255
+ # { lhs: "foo", rhs: "bar", separator: ":", captures: [] },
256
+ # { lhs: "bar", rhs: "baz", separator: ":", captures: [] },
257
+ # { lhs: "baz", rhs: "quux", separator: ":", captures: [] },
258
+ # ]
161
259
  #
162
- # %r| <(\w+-comment)> [^<]* </\1> |x
163
- #
164
- # after:
165
- #
166
- # %r| ( <(\w+-comment)> [^<]* </\2> ) |x
260
+ def parse(string, delimiter)
261
+ result = []
262
+ start = 0
167
263
 
168
- def increment_backrefs(delimiter, ncaptures)
169
- if delimiter.is_a?(Regexp) && ncaptures > 0
170
- delimiter = delimiter.to_s.gsub(/\\(?:(\d+)|.)/) do
171
- match = Regexp.last_match
172
- match[1] ? '\\' + match[1].to_i.next.to_s : match[0]
173
- end
264
+ # we don't use the argument passed to the +scan+ block here because it's a
265
+ # string (the separator) if there are no captures, rather than an empty
266
+ # array. we use match.captures instead to get the array
267
+ string.scan(delimiter) do
268
+ match = Regexp.last_match
269
+ index, after = match.offset(0)
270
+ separator = match[0]
271
+
272
+ # ignore empty separators at the beginning and/or end of the string
273
+ next if separator.empty? && (index.zero? || after == string.length)
274
+
275
+ lhs = string.slice(start, index - start)
276
+ result.last[:rhs] = lhs unless result.empty?
277
+
278
+ # this is correct for the last/only match, but gets updated to the next
279
+ # match's lhs for other matches
280
+ rhs = match.post_match
281
+
282
+ result << {
283
+ captures: match.captures,
284
+ lhs: lhs,
285
+ rhs: rhs,
286
+ separator: separator,
287
+ }
288
+
289
+ # move the start index (the start of the next lhs) to the index after the
290
+ # last character of the separator
291
+ start = after
174
292
  end
175
293
 
176
- delimiter
294
+ result
177
295
  end
178
296
 
179
- # work around Ruby's (and Perl's and Groovy's) unhelpful behavior when splitting
180
- # on an empty string/pattern without removing trailing empty fields e.g.:
297
+ # returns a lambda which splits at (i.e. accepts or rejects splits at, depending
298
+ # on the action) the supplied positions
181
299
  #
182
- # "foobar".split("", -1)
183
- # "foobar".split(//, -1)
184
- # # => ["f", "o", "o", "b", "a", "r", ""]
300
+ # positions are preprocessed to support additional features: negative
301
+ # ranges, infinite ranges, and descending ranges, e.g.:
185
302
  #
186
- # "foobar".split(/()/, -1)
187
- # # => ["f", "", "o", "", "o", "", "b", "", "a", "", "r", "", ""]
303
+ # ss.split("foo:bar:baz:quux", ":", at: -1)
188
304
  #
189
- # "foobar".split(/(())/, -1)
190
- # # => ["f", "", "", "o", "", "", "o", "", "", "b", "", "", "a", "", "", "r", "", "", ""]
305
+ # translates to:
191
306
  #
192
- # *there is no such thing as an empty field whose separator is empty*, so
193
- # if String#split's result ends with an empty separator, 0 or more (empty)
194
- # captures and an empty field, we can safely remove them.
195
-
196
- def remove_trailing_empty_field!(parts, ncaptures)
197
- # the trailing field is at index -1. if there are 0 captures, the separator
198
- # is at -2:
199
- #
200
- # [empty_separator, empty_field]
201
- #
202
- # if there is 1 capture, the separator is at -3:
203
- #
204
- # [empty_separator, capture, empty_field]
307
+ # ss.split("foo:bar:baz:quux", ":", at: 3)
308
+ #
309
+ # and
310
+ #
311
+ # ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
312
+ # ss.split("1:2:3:4:5:6:7:8:9", ":", -3..)
313
+ #
314
+ # translate to:
315
+ #
316
+ # ss.split("foo:bar:baz:quux", ":", at: 6..8)
317
+ #
318
+ def compile(positions, action, count)
319
+ # XXX note: we don't use modulo, because we don't want
320
+ # out-of-bounds indices to silently work, e.g. we don't want:
205
321
  #
206
- # etc. therefore we find the separator by walking back
322
+ # ss.split("foo:bar:baz:quux", ":", at: -42)
207
323
  #
208
- # 1 (empty field)
209
- # + ncaptures
210
- # + 1 (separator)
324
+ # to mysteriously match when the index/position is 0/1
211
325
  #
212
- # steps from the end of the array i.e. ncaptures + 2
213
- count = ncaptures + 2
214
- separator_index = count * -1
215
-
216
- return unless parts[-1].empty? && parts[separator_index].empty?
217
-
218
- # drop the empty separator, the (empty) captures, and the trailing empty field
219
- parts.pop(count)
220
- end
221
-
222
- def match_positions(positions, nsplits)
223
- positions = Array(positions).map do |position|
224
- if position.is_a?(Integer) && position.negative?
225
- # translate negative indices to 1-based non-negative indices e.g:
226
- #
227
- # ss.split("foo:bar:baz:quux", ":", at: -1)
228
- #
229
- # translates to:
230
- #
231
- # ss.split("foo:bar:baz:quux", ":", at: 3)
232
- #
233
- # XXX note: we don't use modulo, because we don't want
234
- # out-of-bounds indices to silently work e.g. we don't want:
235
- #
236
- # ss.split("foo:bar:baz:quux", ":", -42)
237
- #
238
- # to mysteriously match when the position is 2
239
-
240
- nsplits + 1 + position
326
+ resolve = ->(int) { int.negative? ? count + 1 + int : int }
327
+
328
+ # don't use Array(...) to wrap these as we don't want to convert ranges
329
+ positions = positions.is_a?(Array) ? positions : [positions]
330
+
331
+ positions = positions.map do |position|
332
+ if position.is_a?(Integer)
333
+ resolve[position]
334
+ elsif position.is_a?(Range)
335
+ rbegin = position.begin
336
+ rend = position.end
337
+ rexc = position.exclude_end?
338
+
339
+ if rbegin.nil?
340
+ Range.new(1, resolve[rend], rexc)
341
+ elsif rend.nil?
342
+ Range.new(resolve[rbegin], count, rexc)
343
+ elsif rbegin.negative? || rend.negative? || (rend - rbegin).negative?
344
+ from = resolve[rbegin]
345
+ to = resolve[rend]
346
+ to < from ? Range.new(to, from, rexc) : Range.new(from, to, rexc)
347
+ else
348
+ position
349
+ end
350
+ elsif position.is_a?(Set)
351
+ position.map { |it| resolve[it] }.to_set
241
352
  else
242
353
  position
243
354
  end
244
355
  end
245
356
 
246
- lambda do |split|
247
- case split.position when *positions then true else false end
248
- end
357
+ ->(split) { case split.position when *positions then action else !action end }
249
358
  end
250
359
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class StringSplitter
4
- VERSION = '0.3.1'
4
+ VERSION = '0.7.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_splitter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - chocolateboy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-06-24 00:00:00.000000000 Z
11
+ date: 2020-08-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: values
@@ -30,42 +30,42 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.16'
33
+ version: '2.1'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.16'
40
+ version: '2.1'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: minitest
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '5.11'
47
+ version: '5.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '5.11'
54
+ version: '5.0'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: minitest-power_assert
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: 0.3.0
61
+ version: '0.3'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: 0.3.0
68
+ version: '0.3'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: minitest-reporters
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -86,29 +86,15 @@ dependencies:
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '10.0'
89
+ version: '13.0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '10.0'
97
- - !ruby/object:Gem::Dependency
98
- name: rubocop
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: 0.54.0
104
- type: :development
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: 0.54.0
111
- description:
96
+ version: '13.0'
97
+ description:
112
98
  email: chocolate@cpan.org
113
99
  executables: []
114
100
  extensions: []
@@ -127,7 +113,7 @@ metadata:
127
113
  bug_tracker_uri: https://github.com/chocolateboy/string_splitter/issues
128
114
  changelog_uri: https://github.com/chocolateboy/string_splitter/blob/master/CHANGELOG.md
129
115
  source_code_uri: https://github.com/chocolateboy/string_splitter
130
- post_install_message:
116
+ post_install_message:
131
117
  rdoc_options: []
132
118
  require_paths:
133
119
  - lib
@@ -135,16 +121,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
135
121
  requirements:
136
122
  - - ">="
137
123
  - !ruby/object:Gem::Version
138
- version: '0'
124
+ version: '2.3'
139
125
  required_rubygems_version: !ruby/object:Gem::Requirement
140
126
  requirements:
141
127
  - - ">="
142
128
  - !ruby/object:Gem::Version
143
129
  version: '0'
144
130
  requirements: []
145
- rubyforge_project:
146
- rubygems_version: 2.7.7
147
- signing_key:
131
+ rubygems_version: 3.1.4
132
+ signing_key:
148
133
  specification_version: 4
149
134
  summary: String#split on steroids
150
135
  test_files: []