string_splitter 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -2
- data/README.md +10 -10
- data/lib/string_splitter.rb +7 -7
- data/lib/string_splitter/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 128e1b2cc29cb122f3d5040f7c9e115688532c7e68c59f0a5373291e995642f9
|
4
|
+
data.tar.gz: 6b8729b7fb59aa984c1940ff0f9a1a308dded8b77c7db2b3c2a2ad4cdbd8bd52
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3aa949fb5ac46369af379e2fd28bc18f7c93746515aea76005d606a4cb9f20426dec353bef8406264078cdccf89cdaca99902d961687f12fb72be08d9f2b0072
|
7
|
+
data.tar.gz: acd982d39a003be78b4548992cf108141e89e51a58f918e10515fb01dd1fd562db319e5c0dc5475d3e74739726e1fd752bbee4850b823705fb9520ff6b05e99f
|
data/CHANGELOG.md
CHANGED
@@ -1,7 +1,13 @@
|
|
1
|
+
## 0.3.0 - 2018-06-23
|
2
|
+
|
3
|
+
- **breaking change**: rename the `default_separator` option to `default_delimiter`
|
4
|
+
- to avoid ambiguity in the code, refer to the input pattern/string as the
|
5
|
+
"delimiter" and the matched string as the "separator"
|
6
|
+
|
1
7
|
## 0.2.0 - 2018-06-22
|
2
8
|
|
3
|
-
- **breaking change**: make `index` (AKA `offset`) 0-based and add `position`
|
4
|
-
as the 1-based accessor
|
9
|
+
- **breaking change**: make `index` (AKA `offset`) 0-based and add `position`
|
10
|
+
(AKA `pos`) as the 1-based accessor
|
5
11
|
|
6
12
|
## 0.1.0 - 2018-06-22
|
7
13
|
|
data/README.md
CHANGED
@@ -43,15 +43,15 @@ ss.split("foo bar baz quux", " ")
|
|
43
43
|
ss.split("foo bar baz quux", /\s+/)
|
44
44
|
# => ["foo", "bar", "baz", "quux"]
|
45
45
|
|
46
|
-
# split
|
46
|
+
# split at the first delimiter
|
47
47
|
ss.split("foo:bar:baz:quux", ":", at: 1)
|
48
48
|
# => ["foo", "bar:baz:quux"]
|
49
49
|
|
50
|
-
# split
|
50
|
+
# split at the last delimiter
|
51
51
|
ss.split("foo:bar:baz:quux", ":", at: -1)
|
52
52
|
# => ["foo:bar:baz", "quux"]
|
53
53
|
|
54
|
-
# split at multiple
|
54
|
+
# split at multiple delimiter positions
|
55
55
|
ss.split("1:2:3:4:5:6:7:8:9", ":", at: [1..3, -2])
|
56
56
|
# => ["1", "2", "3", "4:5:6:7", "8:9"]
|
57
57
|
|
@@ -76,11 +76,11 @@ and handle a few common cases e.g.:
|
|
76
76
|
* including the separators in the results
|
77
77
|
* removing (some) empty fields
|
78
78
|
|
79
|
-
But, because the API is squeezed into two overloaded parameters (the
|
79
|
+
But, because the API is squeezed into two overloaded parameters (the delimiter and the limit),
|
80
80
|
achieving the desired effects can be tricky. For instance, while `String#split` removes empty
|
81
81
|
trailing fields (by default), it provides no way to remove *all* empty fields. Likewise, the
|
82
82
|
cramped API means there's no way to e.g. combine a limit (positive integer) with the option
|
83
|
-
to preserve empty fields (negative integer), or use backreferences in a
|
83
|
+
to preserve empty fields (negative integer), or use backreferences in a delimiter pattern
|
84
84
|
without including its captured subexpressions in the result.
|
85
85
|
|
86
86
|
If `split` was being written from scratch, without the baggage of its legacy API,
|
@@ -106,7 +106,7 @@ ss.split("foo:bar:baz", ":") { |split| split.position == split.count }
|
|
106
106
|
# => ["foo:bar", "baz"]
|
107
107
|
```
|
108
108
|
|
109
|
-
As a shortcut, the common case of splitting on
|
109
|
+
As a shortcut, the common case of splitting on delimiters at one or more positions is supported by an option:
|
110
110
|
|
111
111
|
```ruby
|
112
112
|
ss.split('foo:bar:baz:quux', ':', at: [1, -1]) # => ["foo", "bar:baz", "quux"]
|
@@ -116,7 +116,7 @@ ss.split('foo:bar:baz:quux', ':', at: [1, -1]) # => ["foo", "bar:baz", "quux"]
|
|
116
116
|
|
117
117
|
I wanted to split semi-structured output into fields without having to resort to a regex or a full-blown parser.
|
118
118
|
|
119
|
-
As an example, the nominally unstructured output of many Unix commands is often
|
119
|
+
As an example, the nominally unstructured output of many Unix commands is often formatted in a way
|
120
120
|
that's tantalizingly close to being machine-readable, apart from a few pesky exceptions e.g.:
|
121
121
|
|
122
122
|
```bash
|
@@ -156,7 +156,7 @@ line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+
|
|
156
156
|
```
|
157
157
|
|
158
158
|
But that requires us to specify *everything*. What we really want is a version of `split`
|
159
|
-
which allows us to veto splitting for the 6th and 7th
|
159
|
+
which allows us to veto splitting for the 6th and 7th delimiters i.e. control over which
|
160
160
|
splits are accepted, rather than being restricted to the single, baked-in strategy provided
|
161
161
|
by the `limit` parameter.
|
162
162
|
|
@@ -179,13 +179,13 @@ ss.split(line, at: [1..5, 8])
|
|
179
179
|
|
180
180
|
# VERSION
|
181
181
|
|
182
|
-
0.
|
182
|
+
0.3.0
|
183
183
|
|
184
184
|
# SEE ALSO
|
185
185
|
|
186
186
|
## Gems
|
187
187
|
|
188
|
-
- [rsplit](https://github.com/Tatzyr/rsplit) - a reverse-split implementation (only works with string
|
188
|
+
- [rsplit](https://github.com/Tatzyr/rsplit) - a reverse-split implementation (only works with string delimiters)
|
189
189
|
|
190
190
|
## Articles
|
191
191
|
|
data/lib/string_splitter.rb
CHANGED
@@ -13,7 +13,7 @@ require 'values'
|
|
13
13
|
# guns e.g. regex matching or parsing.
|
14
14
|
class StringSplitter
|
15
15
|
ACCEPT = ->(_split) { true }
|
16
|
-
|
16
|
+
DEFAULT_DELIMITER = /\s+/
|
17
17
|
NO_SPLITS = []
|
18
18
|
|
19
19
|
Split = Value.new(:captures, :count, :index, :lhs, :rhs, :separator) do
|
@@ -26,20 +26,20 @@ class StringSplitter
|
|
26
26
|
end
|
27
27
|
|
28
28
|
def initialize(
|
29
|
-
|
29
|
+
default_delimiter: DEFAULT_DELIMITER,
|
30
30
|
include_captures: true,
|
31
31
|
remove_empty: false,
|
32
32
|
spread_captures: true
|
33
33
|
)
|
34
|
-
@
|
34
|
+
@default_delimiter = default_delimiter
|
35
35
|
@include_captures = include_captures
|
36
36
|
@remove_empty = remove_empty
|
37
37
|
@spread_captures = spread_captures
|
38
38
|
end
|
39
39
|
|
40
|
-
attr_reader :
|
40
|
+
attr_reader :default_delimiter, :include_captures, :remove_empty, :spread_captures
|
41
41
|
|
42
|
-
def split(string, delimiter = @
|
42
|
+
def split(string, delimiter = @default_delimiter, at: nil, &block)
|
43
43
|
result, block, splits, count, index = split_common(string, delimiter, at, block)
|
44
44
|
|
45
45
|
splits.each do |split|
|
@@ -67,7 +67,7 @@ class StringSplitter
|
|
67
67
|
|
68
68
|
alias lsplit split
|
69
69
|
|
70
|
-
def rsplit(string, delimiter = @
|
70
|
+
def rsplit(string, delimiter = @default_delimiter, at: nil, &block)
|
71
71
|
result, block, splits, count, index = split_common(string, delimiter, at, block)
|
72
72
|
|
73
73
|
splits.reverse!.each do |split|
|
@@ -110,7 +110,7 @@ class StringSplitter
|
|
110
110
|
# do nothing
|
111
111
|
elsif parts.empty? # last split
|
112
112
|
result << (!lhs.empty? ? lhs : rhs) if splits.empty?
|
113
|
-
elsif
|
113
|
+
elsif rhs.empty?
|
114
114
|
# replace the empty rhs with the non-empty lhs
|
115
115
|
parts[0] = lhs
|
116
116
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_splitter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chocolateboy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: values
|