string_splitter 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -2
- data/README.md +15 -14
- data/lib/string_splitter.rb +11 -4
- data/lib/string_splitter/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fcb033c8d7804dab11a7756f3fafb06292650c048947d09b01d479e354d2dfa4
|
4
|
+
data.tar.gz: ffc6e4fae751883e2f7731232ccb3b539eb95ff030d716360b284e21339651ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b6ccb81fd4b270251f0f1dc4108e67485a9795a3fd8bdb3ce852a5d7498933e0f431da0d60732fc61e9224c4676ee5104858db0e40ed6589e03eb9fa147fc0dc
|
7
|
+
data.tar.gz: f4d011a2fc127645ca5bbf9fbb607d854e021021b69047a7fbdc6ba9124edbdca750901147a0cce22cd80ca05197355de94c9e3bd353b1a1a1ebfde136eefdce
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,12 @@
|
|
1
|
+
## 0.2.0 - 2018-06-22
|
2
|
+
|
3
|
+
- **breaking change**: make `index` (AKA `offset`) 0-based and add `position` (AKA `pos`)
|
4
|
+
as the 1-based accessor
|
5
|
+
|
1
6
|
## 0.1.0 - 2018-06-22
|
2
7
|
|
3
|
-
- **breaking change**: the block now takes a single `split` object with an
|
4
|
-
|
8
|
+
- **breaking change**: the block now takes a single `split` object with an
|
9
|
+
`index` accessor, rather than seperate `index` and `split` arguments
|
5
10
|
- add support for negative indices in the value supplied to the `at` option
|
6
11
|
- add a `count` field to the split object containing the total number of splits
|
7
12
|
|
data/README.md
CHANGED
@@ -51,17 +51,17 @@ ss.split("foo:bar:baz:quux", ":", at: 1)
|
|
51
51
|
ss.split("foo:bar:baz:quux", ":", at: -1)
|
52
52
|
# => ["foo:bar:baz", "quux"]
|
53
53
|
|
54
|
-
# split
|
55
|
-
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -
|
56
|
-
# => ["1", "2", "3", "4:5:6:7
|
54
|
+
# split at multiple separator positions
|
55
|
+
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -2])
|
56
|
+
# => ["1", "2", "3", "4:5:6:7", "8:9"]
|
57
57
|
|
58
58
|
# split from the right
|
59
59
|
ss.rsplit("1:2:3:4:5:6:7:8:9", ":", at: [1..3, 5])
|
60
60
|
# => ["1:2:3:4", "5:6", "7", "8", "9"]
|
61
61
|
|
62
62
|
# full control via a block
|
63
|
-
result =
|
64
|
-
split.index >
|
63
|
+
result = ss.split('a:a:a:b:c:c:e:a:a:d:c', ":") do |split|
|
64
|
+
split.index > 0 && split.lhs == split.rhs
|
65
65
|
end
|
66
66
|
# => ["a:a", "a:b:c", "c:e:a", "a:d:c"]
|
67
67
|
```
|
@@ -79,8 +79,9 @@ and handle a few common cases e.g.:
|
|
79
79
|
But, because the API is squeezed into two overloaded parameters (the separator and the limit),
|
80
80
|
achieving the desired effects can be tricky. For instance, while `String#split` removes empty
|
81
81
|
trailing fields (by default), it provides no way to remove *all* empty fields. Likewise, the
|
82
|
-
cramped API means there's no way to
|
83
|
-
to preserve empty fields (negative integer)
|
82
|
+
cramped API means there's no way to e.g. combine a limit (positive integer) with the option
|
83
|
+
to preserve empty fields (negative integer), or use backreferences in a separator pattern
|
84
|
+
without including its captured subexpressions in the result.
|
84
85
|
|
85
86
|
If `split` was being written from scratch, without the baggage of its legacy API,
|
86
87
|
it's possible that some of these options would be made explicit rather than overloading
|
@@ -98,14 +99,14 @@ and delegating the strategy — i.e. which splits should be accepted or rejected
|
|
98
99
|
```ruby
|
99
100
|
ss = StringSplitter.new
|
100
101
|
|
101
|
-
ss.split("foo:bar:baz", ":") { |split| split.index ==
|
102
|
+
ss.split("foo:bar:baz", ":") { |split| split.index == 0 }
|
102
103
|
# => ["foo", "bar:baz"]
|
103
104
|
|
104
|
-
ss.split("foo:bar:baz", ":") { |split| split.
|
105
|
+
ss.split("foo:bar:baz", ":") { |split| split.position == split.count }
|
105
106
|
# => ["foo:bar", "baz"]
|
106
107
|
```
|
107
108
|
|
108
|
-
As a shortcut, the common case of splitting on separators at one or more
|
109
|
+
As a shortcut, the common case of splitting on separators at one or more positions is supported by an option:
|
109
110
|
|
110
111
|
```ruby
|
111
112
|
ss.split('foo:bar:baz:quux', ':', at: [1, -1]) # => ["foo", "bar:baz", "quux"]
|
@@ -119,7 +120,7 @@ As an example, the nominally unstructured output of many Unix commands is often,
|
|
119
120
|
that's tantalizingly close to being machine-readable, apart from a few pesky exceptions e.g.:
|
120
121
|
|
121
122
|
```bash
|
122
|
-
$ ls -
|
123
|
+
$ ls -l
|
123
124
|
|
124
125
|
-rw-r--r-- 1 user users 87 Jun 18 18:16 CHANGELOG.md
|
125
126
|
-rw-r--r-- 1 user users 254 Jun 19 21:21 Gemfile
|
@@ -151,7 +152,7 @@ instead of:
|
|
151
152
|
One way to work around this is to parse the whole line e.g.:
|
152
153
|
|
153
154
|
```ruby
|
154
|
-
line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+ \S+) (.+)$/x)
|
155
|
+
line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+ \S+) \s+ (.+)$/x)
|
155
156
|
```
|
156
157
|
|
157
158
|
But that requires us to specify *everything*. What we really want is a version of `split`
|
@@ -164,7 +165,7 @@ this easy to handle, either via a block:
|
|
164
165
|
|
165
166
|
```ruby
|
166
167
|
ss.split(line) do |split|
|
167
|
-
case split.
|
168
|
+
case split.position when 1..5, 8 then true end
|
168
169
|
end
|
169
170
|
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
170
171
|
```
|
@@ -178,7 +179,7 @@ ss.split(line, at: [1..5, 8])
|
|
178
179
|
|
179
180
|
# VERSION
|
180
181
|
|
181
|
-
0.
|
182
|
+
0.2.0
|
182
183
|
|
183
184
|
# SEE ALSO
|
184
185
|
|
data/lib/string_splitter.rb
CHANGED
@@ -16,7 +16,14 @@ class StringSplitter
|
|
16
16
|
DEFAULT_SEPARATOR = /\s+/
|
17
17
|
NO_SPLITS = []
|
18
18
|
|
19
|
-
Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator)
|
19
|
+
Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator) do
|
20
|
+
def position
|
21
|
+
index + 1
|
22
|
+
end
|
23
|
+
|
24
|
+
alias_method :offset, :index
|
25
|
+
alias_method :pos, :position
|
26
|
+
end
|
20
27
|
|
21
28
|
def initialize(
|
22
29
|
default_separator: DEFAULT_SEPARATOR,
|
@@ -126,7 +133,7 @@ class StringSplitter
|
|
126
133
|
def split_common(string, delimiter, at, block)
|
127
134
|
unless (match = string.match(delimiter))
|
128
135
|
result = (@remove_empty && string.empty?) ? [] : [string]
|
129
|
-
return [result, block, NO_SPLITS, 0,
|
136
|
+
return [result, block, NO_SPLITS, 0, -1]
|
130
137
|
end
|
131
138
|
|
132
139
|
ncaptures = match.captures.length
|
@@ -185,13 +192,13 @@ class StringSplitter
|
|
185
192
|
end
|
186
193
|
|
187
194
|
block = lambda do |split|
|
188
|
-
case split.
|
195
|
+
case split.position when *at then true else false end
|
189
196
|
end
|
190
197
|
else
|
191
198
|
block = ACCEPT
|
192
199
|
end
|
193
200
|
end
|
194
201
|
|
195
|
-
[result, block, splits, count,
|
202
|
+
[result, block, splits, count, -1]
|
196
203
|
end
|
197
204
|
end
|