string_splitter 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/README.md +43 -28
- data/lib/string_splitter.rb +96 -83
- data/lib/string_splitter/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3e22243b9c975e4ac2ffa8f03871f5c41fd406cb3b4b780dd303e89bcf024c45
|
4
|
+
data.tar.gz: fced8a0defba0a46d1dde5ffef3c7ff9a93c4f43afa8d0b19c98d93524c466f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d7991eb02cea9c35a26e9248c33ff923b71637b476110f58d4b35abae398efa5603eca6d0ed178d797a69d79f6b1caa1468ec4753e185d60280ccfe4049bc1a
|
7
|
+
data.tar.gz: a944e39f2105d61585ca703073dcedd3e4b4e2be9dd88db01bfef07ab67bb265b15741d3b8a558c505525e06f800385f857a81945f527256102fdcef3084ee89
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## 0.1.0 - 2018-06-22
|
2
|
+
|
3
|
+
- **breaking change**: the block now takes a single `split` object with an `index`
|
4
|
+
field, rather than seperate `index` and `split` arguments
|
5
|
+
- add support for negative indices in the value supplied to the `at` option
|
6
|
+
- add a `count` field to the split object containing the total number of splits
|
7
|
+
|
1
8
|
## 0.0.1 - 2018-06-21
|
2
9
|
|
3
|
-
|
10
|
+
- initial release
|
data/README.md
CHANGED
@@ -39,6 +39,8 @@ ss = StringSplitter.new
|
|
39
39
|
|
40
40
|
# same as String#split
|
41
41
|
ss.split("foo bar baz quux")
|
42
|
+
ss.split("foo bar baz quux", " ")
|
43
|
+
ss.split("foo bar baz quux", /\s+/)
|
42
44
|
# => ["foo", "bar", "baz", "quux"]
|
43
45
|
|
44
46
|
# split on the first separator
|
@@ -46,20 +48,22 @@ ss.split("foo:bar:baz:quux", ":", at: 1)
|
|
46
48
|
# => ["foo", "bar:baz:quux"]
|
47
49
|
|
48
50
|
# split on the last separator
|
49
|
-
ss.
|
51
|
+
ss.split("foo:bar:baz:quux", ":", at: -1)
|
50
52
|
# => ["foo:bar:baz", "quux"]
|
51
53
|
|
52
|
-
# split on
|
53
|
-
|
54
|
-
|
55
|
-
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
54
|
+
# split on multiple separator indices
|
55
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -1])
|
56
|
+
# => ["1", "2", "3", "4:5:6:7:8", "9"]
|
56
57
|
|
57
|
-
#
|
58
|
-
ss.
|
59
|
-
|
60
|
-
end
|
61
|
-
# => ["foo:bar:baz", "baz"]
|
58
|
+
# split from the right
|
59
|
+
ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3, 5])
|
60
|
+
# => ["1:2:3:4", "5:6", "7", "8", "9"]
|
62
61
|
|
62
|
+
# full control via a block
|
63
|
+
result = s.split('a:a:a:b:c:c:e:a:a:d:c', ":") do |split|
|
64
|
+
split.index > 1 && split.lhs == split.rhs
|
65
|
+
end
|
66
|
+
# => ["a:a", "a:b:c", "c:e:a", "a:d:c"]
|
63
67
|
```
|
64
68
|
|
65
69
|
# DESCRIPTION
|
@@ -70,17 +74,18 @@ and handle a few common cases e.g.:
|
|
70
74
|
|
71
75
|
* limiting the number of splits
|
72
76
|
* including the separators in the results
|
73
|
-
* removing (some) empty
|
77
|
+
* removing (some) empty fields
|
74
78
|
|
75
79
|
But, because the API is squeezed into two overloaded parameters (the separator and the limit),
|
76
80
|
achieving the desired effects can be tricky. For instance, while `String#split` removes empty
|
77
|
-
trailing
|
81
|
+
trailing fields (by default), it provides no way to remove *all* empty fields. Likewise, the
|
78
82
|
cramped API means there's no way to combine e.g. a limit (positive integer) with the option
|
79
|
-
to preserve empty
|
83
|
+
to preserve empty fields (negative integer).
|
80
84
|
|
81
85
|
If `split` was being written from scratch, without the baggage of its legacy API,
|
82
86
|
it's possible that some of these options would be made explicit rather than overloading
|
83
|
-
the
|
87
|
+
the parameters. And, indeed, this is possible in some implementations,
|
88
|
+
e.g. in Crystal:
|
84
89
|
|
85
90
|
```ruby
|
86
91
|
":foo:bar:baz:".split(":", remove_empty: false) # => ["", "foo", "bar", "baz", ""]
|
@@ -93,23 +98,25 @@ and delegating the strategy — i.e. which splits should be accepted or rejected
|
|
93
98
|
```ruby
|
94
99
|
ss = StringSplitter.new
|
95
100
|
|
96
|
-
ss.split("foo:bar:baz", ":")
|
97
|
-
|
101
|
+
ss.split("foo:bar:baz", ":") { |split| split.index == 1 }
|
102
|
+
# => ["foo", "bar:baz"]
|
103
|
+
|
104
|
+
ss.split("foo:bar:baz", ":") { |split| split.index == split.count }
|
105
|
+
# => ["foo:bar", "baz"]
|
98
106
|
```
|
99
107
|
|
100
|
-
As a shortcut, the common case of splitting at one or more indices
|
108
|
+
As a shortcut, the common case of splitting on separators at one or more indices is supported by an option:
|
101
109
|
|
102
110
|
```ruby
|
103
|
-
ss.split('foo:bar:baz:quux', ':', at: [1,
|
111
|
+
ss.split('foo:bar:baz:quux', ':', at: [1, -1]) # => ["foo", "bar:baz", "quux"]
|
104
112
|
```
|
105
113
|
|
106
114
|
# WHY?
|
107
115
|
|
108
116
|
I wanted to split semi-structured output into fields without having to resort to a regex or a full-blown parser.
|
109
117
|
|
110
|
-
As an example, the nominally unstructured
|
111
|
-
|
112
|
-
exceptions e.g.:
|
118
|
+
As an example, the nominally unstructured output of many Unix commands is often, in practice, formatted in a way
|
119
|
+
that's tantalizingly close to being machine-readable, apart from a few pesky exceptions e.g.:
|
113
120
|
|
114
121
|
```bash
|
115
122
|
$ ls -la
|
@@ -148,22 +155,30 @@ line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+
|
|
148
155
|
```
|
149
156
|
|
150
157
|
But that requires us to specify *everything*. What we really want is a version of `split`
|
151
|
-
|
152
|
-
are accepted, rather than being restricted to the single, baked-in strategy
|
153
|
-
the `limit` parameter.
|
158
|
+
which allows us to veto splitting for the 6th and 7th separators i.e. control over which
|
159
|
+
splits are accepted, rather than being restricted to the single, baked-in strategy provided
|
160
|
+
by the `limit` parameter.
|
154
161
|
|
155
|
-
|
156
|
-
|
162
|
+
By providing a simple way to accept or reject each split, StringSplitter makes cases like
|
163
|
+
this easy to handle, either via a block:
|
157
164
|
|
158
165
|
```ruby
|
159
|
-
ss.split(line
|
166
|
+
ss.split(line) do |split|
|
167
|
+
case split.index when 1..5, 8 then true end
|
168
|
+
end
|
169
|
+
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
170
|
+
```
|
160
171
|
|
172
|
+
Or via its option shortcut:
|
173
|
+
|
174
|
+
```ruby
|
175
|
+
ss.split(line, at: [1..5, 8])
|
161
176
|
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
162
177
|
```
|
163
178
|
|
164
179
|
# VERSION
|
165
180
|
|
166
|
-
0.0
|
181
|
+
0.1.0
|
167
182
|
|
168
183
|
# SEE ALSO
|
169
184
|
|
data/lib/string_splitter.rb
CHANGED
@@ -7,38 +7,39 @@ require 'values'
|
|
7
7
|
# - providing full control over which splits are accepted or rejected
|
8
8
|
# - adding support for splitting from right-to-left
|
9
9
|
# - encapsulating splitting options/preferences in instances rather than trying to
|
10
|
-
# cram them
|
10
|
+
# cram them into overloaded method parameters
|
11
11
|
#
|
12
12
|
# These enhancements allow splits to handle many cases that otherwise require bigger
|
13
13
|
# guns e.g. regex matching or parsing.
|
14
14
|
class StringSplitter
|
15
|
-
ACCEPT = ->(
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
ACCEPT = ->(_split) { true }
|
16
|
+
DEFAULT_SEPARATOR = /\s+/
|
17
|
+
NO_SPLITS = []
|
18
|
+
|
19
|
+
Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator)
|
20
|
+
|
21
|
+
def initialize(
|
22
|
+
default_separator: DEFAULT_SEPARATOR,
|
23
|
+
include_captures: true,
|
24
|
+
remove_empty: false,
|
25
|
+
spread_captures: true
|
26
|
+
)
|
27
|
+
@default_separator = default_separator
|
21
28
|
@include_captures = include_captures
|
22
29
|
@remove_empty = remove_empty
|
23
30
|
@spread_captures = spread_captures
|
24
31
|
end
|
25
32
|
|
26
|
-
|
27
|
-
result, block, iterator, index = split_common(string, delimiter, at, block, :forward)
|
28
|
-
|
29
|
-
return result unless iterator
|
33
|
+
attr_reader :default_separator, :include_captures, :remove_empty, :spread_captures
|
30
34
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
if result.empty?
|
35
|
-
next if @remove_empty && split.lhs.empty?
|
36
|
-
result << split.lhs
|
37
|
-
end
|
35
|
+
def split(string, delimiter = @default_separator, at: nil, &block)
|
36
|
+
result, block, splits, count, index = split_common(string, delimiter, at, block)
|
38
37
|
|
39
|
-
|
38
|
+
splits.each do |split|
|
39
|
+
split = Split.with(split.merge({ index: (index += 1), count: count }))
|
40
|
+
result << split.lhs if result.empty?
|
40
41
|
|
41
|
-
if block.call(
|
42
|
+
if block.call(split)
|
42
43
|
if @include_captures
|
43
44
|
if @spread_captures
|
44
45
|
result += split.captures
|
@@ -59,22 +60,14 @@ class StringSplitter
|
|
59
60
|
|
60
61
|
alias lsplit split
|
61
62
|
|
62
|
-
def rsplit(string, delimiter =
|
63
|
-
result, block,
|
64
|
-
|
65
|
-
return result unless iterator
|
66
|
-
|
67
|
-
iterator.each do |split|
|
68
|
-
next if @remove_empty && split.lhs.empty?
|
69
|
-
|
70
|
-
if result.empty?
|
71
|
-
next if @remove_empty && split.rhs.empty?
|
72
|
-
result.unshift(split.rhs)
|
73
|
-
end
|
63
|
+
def rsplit(string, delimiter = @default_separator, at: nil, &block)
|
64
|
+
result, block, splits, count, index = split_common(string, delimiter, at, block)
|
74
65
|
|
75
|
-
|
66
|
+
splits.reverse!.each do |split|
|
67
|
+
split = Split.with(split.merge({ index: (index += 1), count: count }))
|
68
|
+
result.unshift(split.rhs) if result.empty?
|
76
69
|
|
77
|
-
if block.call(
|
70
|
+
if block.call(split)
|
78
71
|
if @include_captures
|
79
72
|
if @spread_captures
|
80
73
|
result = split.captures + result
|
@@ -95,61 +88,45 @@ class StringSplitter
|
|
95
88
|
|
96
89
|
private
|
97
90
|
|
98
|
-
def
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
91
|
+
def splits_for(parts, ncaptures)
|
92
|
+
result = []
|
93
|
+
splits = []
|
94
|
+
|
95
|
+
until parts.empty?
|
96
|
+
lhs = parts.shift
|
97
|
+
separator = parts.shift
|
98
|
+
captures = parts.shift(ncaptures)
|
99
|
+
rhs = parts.length == 1 ? parts.shift : parts.first
|
100
|
+
|
101
|
+
if @remove_empty && (lhs.empty? || rhs.empty?)
|
102
|
+
if lhs.empty? && rhs.empty?
|
103
|
+
# do nothing
|
104
|
+
elsif parts.empty? # last split
|
105
|
+
result << (!lhs.empty? ? lhs : rhs) if splits.empty?
|
106
|
+
elsif !lhs.empty?
|
107
|
+
# replace the empty rhs with the non-empty lhs
|
108
|
+
parts[0] = lhs
|
109
|
+
end
|
117
110
|
|
118
|
-
|
119
|
-
parts = parts.dup
|
120
|
-
|
121
|
-
Enumerator.new do |yielder|
|
122
|
-
until parts.empty?
|
123
|
-
rhs = parts.pop
|
124
|
-
captures = parts.pop(ncaptures)
|
125
|
-
separator = parts.pop
|
126
|
-
lhs = parts.length == 1 ? parts.pop : parts.last
|
127
|
-
|
128
|
-
yielder << Split.with({
|
129
|
-
lhs: lhs,
|
130
|
-
rhs: rhs,
|
131
|
-
separator: separator,
|
132
|
-
captures: captures,
|
133
|
-
})
|
111
|
+
next
|
134
112
|
end
|
113
|
+
|
114
|
+
splits << {
|
115
|
+
lhs: lhs,
|
116
|
+
rhs: rhs,
|
117
|
+
separator: separator,
|
118
|
+
captures: captures,
|
119
|
+
}
|
135
120
|
end
|
121
|
+
|
122
|
+
[result, splits]
|
136
123
|
end
|
137
124
|
|
138
125
|
# setup common to both split methods
|
139
|
-
def split_common(string, delimiter, at, block
|
126
|
+
def split_common(string, delimiter, at, block)
|
140
127
|
unless (match = string.match(delimiter))
|
141
128
|
result = (@remove_empty && string.empty?) ? [] : [string]
|
142
|
-
return [result]
|
143
|
-
end
|
144
|
-
|
145
|
-
unless block
|
146
|
-
if at
|
147
|
-
block = lambda do |index, _split|
|
148
|
-
case index when *at then true else false end
|
149
|
-
end
|
150
|
-
else
|
151
|
-
block = ACCEPT
|
152
|
-
end
|
129
|
+
return [result, block, NO_SPLITS, 0, 0]
|
153
130
|
end
|
154
131
|
|
155
132
|
ncaptures = match.captures.length
|
@@ -178,7 +155,43 @@ class StringSplitter
|
|
178
155
|
end
|
179
156
|
|
180
157
|
parts = string.split(/(#{delimiter})/, -1)
|
181
|
-
|
182
|
-
|
158
|
+
result, splits = splits_for(parts, ncaptures)
|
159
|
+
count = splits.length
|
160
|
+
|
161
|
+
unless block
|
162
|
+
if at
|
163
|
+
at = Array(at).map do |index|
|
164
|
+
if index.is_a?(Integer) && index.negative?
|
165
|
+
# translate 1-based negative indices to 1-based positive
|
166
|
+
# indices e.g:
|
167
|
+
#
|
168
|
+
# ss.split("foo:bar:baz:quux", ":", at: -1)
|
169
|
+
#
|
170
|
+
# translates to:
|
171
|
+
#
|
172
|
+
# ss.split("foo:bar:baz:quux", ":", at: 3)
|
173
|
+
#
|
174
|
+
# XXX note: we don't use modulo, because we don't want
|
175
|
+
# out-of-bounds indices to silently work e.g. we don't want:
|
176
|
+
#
|
177
|
+
# ss.split("foo:bar:baz:quux", ":", -42)
|
178
|
+
#
|
179
|
+
# to mysteriously match when the index is 2
|
180
|
+
|
181
|
+
count + 1 + index
|
182
|
+
else
|
183
|
+
index
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
block = lambda do |split|
|
188
|
+
case split.index when *at then true else false end
|
189
|
+
end
|
190
|
+
else
|
191
|
+
block = ACCEPT
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
[result, block, splits, count, 0]
|
183
196
|
end
|
184
197
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chocolateboy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: values
|