string_splitter 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/README.md +43 -28
- data/lib/string_splitter.rb +96 -83
- data/lib/string_splitter/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3e22243b9c975e4ac2ffa8f03871f5c41fd406cb3b4b780dd303e89bcf024c45
|
4
|
+
data.tar.gz: fced8a0defba0a46d1dde5ffef3c7ff9a93c4f43afa8d0b19c98d93524c466f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d7991eb02cea9c35a26e9248c33ff923b71637b476110f58d4b35abae398efa5603eca6d0ed178d797a69d79f6b1caa1468ec4753e185d60280ccfe4049bc1a
|
7
|
+
data.tar.gz: a944e39f2105d61585ca703073dcedd3e4b4e2be9dd88db01bfef07ab67bb265b15741d3b8a558c505525e06f800385f857a81945f527256102fdcef3084ee89
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
## 0.1.0 - 2018-06-22
|
2
|
+
|
3
|
+
- **breaking change**: the block now takes a single `split` object with an `index`
|
4
|
+
field, rather than seperate `index` and `split` arguments
|
5
|
+
- add support for negative indices in the value supplied to the `at` option
|
6
|
+
- add a `count` field to the split object containing the total number of splits
|
7
|
+
|
1
8
|
## 0.0.1 - 2018-06-21
|
2
9
|
|
3
|
-
|
10
|
+
- initial release
|
data/README.md
CHANGED
@@ -39,6 +39,8 @@ ss = StringSplitter.new
|
|
39
39
|
|
40
40
|
# same as String#split
|
41
41
|
ss.split("foo bar baz quux")
|
42
|
+
ss.split("foo bar baz quux", " ")
|
43
|
+
ss.split("foo bar baz quux", /\s+/)
|
42
44
|
# => ["foo", "bar", "baz", "quux"]
|
43
45
|
|
44
46
|
# split on the first separator
|
@@ -46,20 +48,22 @@ ss.split("foo:bar:baz:quux", ":", at: 1)
|
|
46
48
|
# => ["foo", "bar:baz:quux"]
|
47
49
|
|
48
50
|
# split on the last separator
|
49
|
-
ss.
|
51
|
+
ss.split("foo:bar:baz:quux", ":", at: -1)
|
50
52
|
# => ["foo:bar:baz", "quux"]
|
51
53
|
|
52
|
-
# split on
|
53
|
-
|
54
|
-
|
55
|
-
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
54
|
+
# split on multiple separator indices
|
55
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -1])
|
56
|
+
# => ["1", "2", "3", "4:5:6:7:8", "9"]
|
56
57
|
|
57
|
-
#
|
58
|
-
ss.
|
59
|
-
|
60
|
-
end
|
61
|
-
# => ["foo:bar:baz", "baz"]
|
58
|
+
# split from the right
|
59
|
+
ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3, 5])
|
60
|
+
# => ["1:2:3:4", "5:6", "7", "8", "9"]
|
62
61
|
|
62
|
+
# full control via a block
|
63
|
+
result = s.split('a:a:a:b:c:c:e:a:a:d:c', ":") do |split|
|
64
|
+
split.index > 1 && split.lhs == split.rhs
|
65
|
+
end
|
66
|
+
# => ["a:a", "a:b:c", "c:e:a", "a:d:c"]
|
63
67
|
```
|
64
68
|
|
65
69
|
# DESCRIPTION
|
@@ -70,17 +74,18 @@ and handle a few common cases e.g.:
|
|
70
74
|
|
71
75
|
* limiting the number of splits
|
72
76
|
* including the separators in the results
|
73
|
-
* removing (some) empty
|
77
|
+
* removing (some) empty fields
|
74
78
|
|
75
79
|
But, because the API is squeezed into two overloaded parameters (the separator and the limit),
|
76
80
|
achieving the desired effects can be tricky. For instance, while `String#split` removes empty
|
77
|
-
trailing
|
81
|
+
trailing fields (by default), it provides no way to remove *all* empty fields. Likewise, the
|
78
82
|
cramped API means there's no way to combine e.g. a limit (positive integer) with the option
|
79
|
-
to preserve empty
|
83
|
+
to preserve empty fields (negative integer).
|
80
84
|
|
81
85
|
If `split` was being written from scratch, without the baggage of its legacy API,
|
82
86
|
it's possible that some of these options would be made explicit rather than overloading
|
83
|
-
the
|
87
|
+
the parameters. And, indeed, this is possible in some implementations,
|
88
|
+
e.g. in Crystal:
|
84
89
|
|
85
90
|
```ruby
|
86
91
|
":foo:bar:baz:".split(":", remove_empty: false) # => ["", "foo", "bar", "baz", ""]
|
@@ -93,23 +98,25 @@ and delegating the strategy — i.e. which splits should be accepted or rejected
|
|
93
98
|
```ruby
|
94
99
|
ss = StringSplitter.new
|
95
100
|
|
96
|
-
ss.split("foo:bar:baz", ":")
|
97
|
-
|
101
|
+
ss.split("foo:bar:baz", ":") { |split| split.index == 1 }
|
102
|
+
# => ["foo", "bar:baz"]
|
103
|
+
|
104
|
+
ss.split("foo:bar:baz", ":") { |split| split.index == split.count }
|
105
|
+
# => ["foo:bar", "baz"]
|
98
106
|
```
|
99
107
|
|
100
|
-
As a shortcut, the common case of splitting at one or more indices
|
108
|
+
As a shortcut, the common case of splitting on separators at one or more indices is supported by an option:
|
101
109
|
|
102
110
|
```ruby
|
103
|
-
ss.split('foo:bar:baz:quux', ':', at: [1,
|
111
|
+
ss.split('foo:bar:baz:quux', ':', at: [1, -1]) # => ["foo", "bar:baz", "quux"]
|
104
112
|
```
|
105
113
|
|
106
114
|
# WHY?
|
107
115
|
|
108
116
|
I wanted to split semi-structured output into fields without having to resort to a regex or a full-blown parser.
|
109
117
|
|
110
|
-
As an example, the nominally unstructured
|
111
|
-
|
112
|
-
exceptions e.g.:
|
118
|
+
As an example, the nominally unstructured output of many Unix commands is often, in practice, formatted in a way
|
119
|
+
that's tantalizingly close to being machine-readable, apart from a few pesky exceptions e.g.:
|
113
120
|
|
114
121
|
```bash
|
115
122
|
$ ls -la
|
@@ -148,22 +155,30 @@ line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+
|
|
148
155
|
```
|
149
156
|
|
150
157
|
But that requires us to specify *everything*. What we really want is a version of `split`
|
151
|
-
|
152
|
-
are accepted, rather than being restricted to the single, baked-in strategy
|
153
|
-
the `limit` parameter.
|
158
|
+
which allows us to veto splitting for the 6th and 7th separators i.e. control over which
|
159
|
+
splits are accepted, rather than being restricted to the single, baked-in strategy provided
|
160
|
+
by the `limit` parameter.
|
154
161
|
|
155
|
-
|
156
|
-
|
162
|
+
By providing a simple way to accept or reject each split, StringSplitter makes cases like
|
163
|
+
this easy to handle, either via a block:
|
157
164
|
|
158
165
|
```ruby
|
159
|
-
ss.split(line
|
166
|
+
ss.split(line) do |split|
|
167
|
+
case split.index when 1..5, 8 then true end
|
168
|
+
end
|
169
|
+
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
170
|
+
```
|
160
171
|
|
172
|
+
Or via its option shortcut:
|
173
|
+
|
174
|
+
```ruby
|
175
|
+
ss.split(line, at: [1..5, 8])
|
161
176
|
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
162
177
|
```
|
163
178
|
|
164
179
|
# VERSION
|
165
180
|
|
166
|
-
0.0
|
181
|
+
0.1.0
|
167
182
|
|
168
183
|
# SEE ALSO
|
169
184
|
|
data/lib/string_splitter.rb
CHANGED
@@ -7,38 +7,39 @@ require 'values'
|
|
7
7
|
# - providing full control over which splits are accepted or rejected
|
8
8
|
# - adding support for splitting from right-to-left
|
9
9
|
# - encapsulating splitting options/preferences in instances rather than trying to
|
10
|
-
# cram them
|
10
|
+
# cram them into overloaded method parameters
|
11
11
|
#
|
12
12
|
# These enhancements allow splits to handle many cases that otherwise require bigger
|
13
13
|
# guns e.g. regex matching or parsing.
|
14
14
|
class StringSplitter
|
15
|
-
ACCEPT = ->(
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
ACCEPT = ->(_split) { true }
|
16
|
+
DEFAULT_SEPARATOR = /\s+/
|
17
|
+
NO_SPLITS = []
|
18
|
+
|
19
|
+
Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator)
|
20
|
+
|
21
|
+
def initialize(
|
22
|
+
default_separator: DEFAULT_SEPARATOR,
|
23
|
+
include_captures: true,
|
24
|
+
remove_empty: false,
|
25
|
+
spread_captures: true
|
26
|
+
)
|
27
|
+
@default_separator = default_separator
|
21
28
|
@include_captures = include_captures
|
22
29
|
@remove_empty = remove_empty
|
23
30
|
@spread_captures = spread_captures
|
24
31
|
end
|
25
32
|
|
26
|
-
|
27
|
-
result, block, iterator, index = split_common(string, delimiter, at, block, :forward)
|
28
|
-
|
29
|
-
return result unless iterator
|
33
|
+
attr_reader :default_separator, :include_captures, :remove_empty, :spread_captures
|
30
34
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
if result.empty?
|
35
|
-
next if @remove_empty && split.lhs.empty?
|
36
|
-
result << split.lhs
|
37
|
-
end
|
35
|
+
def split(string, delimiter = @default_separator, at: nil, &block)
|
36
|
+
result, block, splits, count, index = split_common(string, delimiter, at, block)
|
38
37
|
|
39
|
-
|
38
|
+
splits.each do |split|
|
39
|
+
split = Split.with(split.merge({ index: (index += 1), count: count }))
|
40
|
+
result << split.lhs if result.empty?
|
40
41
|
|
41
|
-
if block.call(
|
42
|
+
if block.call(split)
|
42
43
|
if @include_captures
|
43
44
|
if @spread_captures
|
44
45
|
result += split.captures
|
@@ -59,22 +60,14 @@ class StringSplitter
|
|
59
60
|
|
60
61
|
alias lsplit split
|
61
62
|
|
62
|
-
def rsplit(string, delimiter =
|
63
|
-
result, block,
|
64
|
-
|
65
|
-
return result unless iterator
|
66
|
-
|
67
|
-
iterator.each do |split|
|
68
|
-
next if @remove_empty && split.lhs.empty?
|
69
|
-
|
70
|
-
if result.empty?
|
71
|
-
next if @remove_empty && split.rhs.empty?
|
72
|
-
result.unshift(split.rhs)
|
73
|
-
end
|
63
|
+
def rsplit(string, delimiter = @default_separator, at: nil, &block)
|
64
|
+
result, block, splits, count, index = split_common(string, delimiter, at, block)
|
74
65
|
|
75
|
-
|
66
|
+
splits.reverse!.each do |split|
|
67
|
+
split = Split.with(split.merge({ index: (index += 1), count: count }))
|
68
|
+
result.unshift(split.rhs) if result.empty?
|
76
69
|
|
77
|
-
if block.call(
|
70
|
+
if block.call(split)
|
78
71
|
if @include_captures
|
79
72
|
if @spread_captures
|
80
73
|
result = split.captures + result
|
@@ -95,61 +88,45 @@ class StringSplitter
|
|
95
88
|
|
96
89
|
private
|
97
90
|
|
98
|
-
def
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
91
|
+
def splits_for(parts, ncaptures)
|
92
|
+
result = []
|
93
|
+
splits = []
|
94
|
+
|
95
|
+
until parts.empty?
|
96
|
+
lhs = parts.shift
|
97
|
+
separator = parts.shift
|
98
|
+
captures = parts.shift(ncaptures)
|
99
|
+
rhs = parts.length == 1 ? parts.shift : parts.first
|
100
|
+
|
101
|
+
if @remove_empty && (lhs.empty? || rhs.empty?)
|
102
|
+
if lhs.empty? && rhs.empty?
|
103
|
+
# do nothing
|
104
|
+
elsif parts.empty? # last split
|
105
|
+
result << (!lhs.empty? ? lhs : rhs) if splits.empty?
|
106
|
+
elsif !lhs.empty?
|
107
|
+
# replace the empty rhs with the non-empty lhs
|
108
|
+
parts[0] = lhs
|
109
|
+
end
|
117
110
|
|
118
|
-
|
119
|
-
parts = parts.dup
|
120
|
-
|
121
|
-
Enumerator.new do |yielder|
|
122
|
-
until parts.empty?
|
123
|
-
rhs = parts.pop
|
124
|
-
captures = parts.pop(ncaptures)
|
125
|
-
separator = parts.pop
|
126
|
-
lhs = parts.length == 1 ? parts.pop : parts.last
|
127
|
-
|
128
|
-
yielder << Split.with({
|
129
|
-
lhs: lhs,
|
130
|
-
rhs: rhs,
|
131
|
-
separator: separator,
|
132
|
-
captures: captures,
|
133
|
-
})
|
111
|
+
next
|
134
112
|
end
|
113
|
+
|
114
|
+
splits << {
|
115
|
+
lhs: lhs,
|
116
|
+
rhs: rhs,
|
117
|
+
separator: separator,
|
118
|
+
captures: captures,
|
119
|
+
}
|
135
120
|
end
|
121
|
+
|
122
|
+
[result, splits]
|
136
123
|
end
|
137
124
|
|
138
125
|
# setup common to both split methods
|
139
|
-
def split_common(string, delimiter, at, block
|
126
|
+
def split_common(string, delimiter, at, block)
|
140
127
|
unless (match = string.match(delimiter))
|
141
128
|
result = (@remove_empty && string.empty?) ? [] : [string]
|
142
|
-
return [result]
|
143
|
-
end
|
144
|
-
|
145
|
-
unless block
|
146
|
-
if at
|
147
|
-
block = lambda do |index, _split|
|
148
|
-
case index when *at then true else false end
|
149
|
-
end
|
150
|
-
else
|
151
|
-
block = ACCEPT
|
152
|
-
end
|
129
|
+
return [result, block, NO_SPLITS, 0, 0]
|
153
130
|
end
|
154
131
|
|
155
132
|
ncaptures = match.captures.length
|
@@ -178,7 +155,43 @@ class StringSplitter
|
|
178
155
|
end
|
179
156
|
|
180
157
|
parts = string.split(/(#{delimiter})/, -1)
|
181
|
-
|
182
|
-
|
158
|
+
result, splits = splits_for(parts, ncaptures)
|
159
|
+
count = splits.length
|
160
|
+
|
161
|
+
unless block
|
162
|
+
if at
|
163
|
+
at = Array(at).map do |index|
|
164
|
+
if index.is_a?(Integer) && index.negative?
|
165
|
+
# translate 1-based negative indices to 1-based positive
|
166
|
+
# indices e.g:
|
167
|
+
#
|
168
|
+
# ss.split("foo:bar:baz:quux", ":", at: -1)
|
169
|
+
#
|
170
|
+
# translates to:
|
171
|
+
#
|
172
|
+
# ss.split("foo:bar:baz:quux", ":", at: 3)
|
173
|
+
#
|
174
|
+
# XXX note: we don't use modulo, because we don't want
|
175
|
+
# out-of-bounds indices to silently work e.g. we don't want:
|
176
|
+
#
|
177
|
+
# ss.split("foo:bar:baz:quux", ":", -42)
|
178
|
+
#
|
179
|
+
# to mysteriously match when the index is 2
|
180
|
+
|
181
|
+
count + 1 + index
|
182
|
+
else
|
183
|
+
index
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
block = lambda do |split|
|
188
|
+
case split.index when *at then true else false end
|
189
|
+
end
|
190
|
+
else
|
191
|
+
block = ACCEPT
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
[result, block, splits, count, 0]
|
183
196
|
end
|
184
197
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chocolateboy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: values
|