string_splitter 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.md +192 -0
- data/README.md +187 -0
- data/lib/string_splitter/version.rb +5 -0
- data/lib/string_splitter.rb +184 -0
- metadata +150 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 22890318b3693dc7d3489ff580109bf377a8798b820e8589c9ff52a9880ca4a1
|
|
4
|
+
data.tar.gz: bb9b1894513e3206bd50ccc2908b756d0fac5bfec13d96d76617ba006306b4da
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 51777239fa93949f6fef2690a12d91ece9b024cbffc60d5942073366a159df90341e497dcb19053de11dadc2ac318c7fb8d19f52f92b405445ec994fe847dd23
|
|
7
|
+
data.tar.gz: a65a1db7358c8e0f46a28ee2328e47b8ffa326d810686b35b56a8ce09358932b62337f885e58bb8ac97397c046046d4d02a744ca985383a4db4bd3e97e979737
|
data/CHANGELOG.md
ADDED
data/LICENSE.md
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
The Artistic License 2.0
|
|
2
|
+
========================
|
|
3
|
+
|
|
4
|
+
_Copyright © 2000-2006, The Perl Foundation._
|
|
5
|
+
|
|
6
|
+
Everyone is permitted to copy and distribute verbatim copies
|
|
7
|
+
of this license document, but changing it is not allowed.
|
|
8
|
+
|
|
9
|
+
### Preamble
|
|
10
|
+
|
|
11
|
+
This license establishes the terms under which a given free software
|
|
12
|
+
Package may be copied, modified, distributed, and/or redistributed.
|
|
13
|
+
The intent is that the Copyright Holder maintains some artistic
|
|
14
|
+
control over the development of that Package while still keeping the
|
|
15
|
+
Package available as open source and free software.
|
|
16
|
+
|
|
17
|
+
You are always permitted to make arrangements wholly outside of this
|
|
18
|
+
license directly with the Copyright Holder of a given Package. If the
|
|
19
|
+
terms of this license do not permit the full use that you propose to
|
|
20
|
+
make of the Package, you should contact the Copyright Holder and seek
|
|
21
|
+
a different licensing arrangement.
|
|
22
|
+
|
|
23
|
+
### Definitions
|
|
24
|
+
|
|
25
|
+
“Copyright Holder” means the individual(s) or organization(s)
|
|
26
|
+
named in the copyright notice for the entire Package.
|
|
27
|
+
|
|
28
|
+
“Contributor” means any party that has contributed code or other
|
|
29
|
+
material to the Package, in accordance with the Copyright Holder's
|
|
30
|
+
procedures.
|
|
31
|
+
|
|
32
|
+
“You” and “your” means any person who would like to copy,
|
|
33
|
+
distribute, or modify the Package.
|
|
34
|
+
|
|
35
|
+
“Package” means the collection of files distributed by the
|
|
36
|
+
Copyright Holder, and derivatives of that collection and/or of
|
|
37
|
+
those files. A given Package may consist of either the Standard
|
|
38
|
+
Version, or a Modified Version.
|
|
39
|
+
|
|
40
|
+
“Distribute” means providing a copy of the Package or making it
|
|
41
|
+
accessible to anyone else, or in the case of a company or
|
|
42
|
+
organization, to others outside of your company or organization.
|
|
43
|
+
|
|
44
|
+
“Distributor Fee” means any fee that you charge for Distributing
|
|
45
|
+
this Package or providing support for this Package to another
|
|
46
|
+
party. It does not mean licensing fees.
|
|
47
|
+
|
|
48
|
+
“Standard Version” refers to the Package if it has not been
|
|
49
|
+
modified, or has been modified only in ways explicitly requested
|
|
50
|
+
by the Copyright Holder.
|
|
51
|
+
|
|
52
|
+
“Modified Version” means the Package, if it has been changed, and
|
|
53
|
+
such changes were not explicitly requested by the Copyright
|
|
54
|
+
Holder.
|
|
55
|
+
|
|
56
|
+
“Original License” means this Artistic License as Distributed with
|
|
57
|
+
the Standard Version of the Package, in its current version or as
|
|
58
|
+
it may be modified by The Perl Foundation in the future.
|
|
59
|
+
|
|
60
|
+
“Source” form means the source code, documentation source, and
|
|
61
|
+
configuration files for the Package.
|
|
62
|
+
|
|
63
|
+
“Compiled” form means the compiled bytecode, object code, binary,
|
|
64
|
+
or any other form resulting from mechanical transformation or
|
|
65
|
+
translation of the Source form.
|
|
66
|
+
|
|
67
|
+
### Permission for Use and Modification Without Distribution
|
|
68
|
+
|
|
69
|
+
**(1)** You are permitted to use the Standard Version and create and use
|
|
70
|
+
Modified Versions for any purpose without restriction, provided that
|
|
71
|
+
you do not Distribute the Modified Version.
|
|
72
|
+
|
|
73
|
+
### Permissions for Redistribution of the Standard Version
|
|
74
|
+
|
|
75
|
+
**(2)** You may Distribute verbatim copies of the Source form of the
|
|
76
|
+
Standard Version of this Package in any medium without restriction,
|
|
77
|
+
either gratis or for a Distributor Fee, provided that you duplicate
|
|
78
|
+
all of the original copyright notices and associated disclaimers. At
|
|
79
|
+
your discretion, such verbatim copies may or may not include a
|
|
80
|
+
Compiled form of the Package.
|
|
81
|
+
|
|
82
|
+
**(3)** You may apply any bug fixes, portability changes, and other
|
|
83
|
+
modifications made available from the Copyright Holder. The resulting
|
|
84
|
+
Package will still be considered the Standard Version, and as such
|
|
85
|
+
will be subject to the Original License.
|
|
86
|
+
|
|
87
|
+
### Distribution of Modified Versions of the Package as Source
|
|
88
|
+
|
|
89
|
+
**(4)** You may Distribute your Modified Version as Source (either gratis
|
|
90
|
+
or for a Distributor Fee, and with or without a Compiled form of the
|
|
91
|
+
Modified Version) provided that you clearly document how it differs
|
|
92
|
+
from the Standard Version, including, but not limited to, documenting
|
|
93
|
+
any non-standard features, executables, or modules, and provided that
|
|
94
|
+
you do at least ONE of the following:
|
|
95
|
+
|
|
96
|
+
* **(a)** make the Modified Version available to the Copyright Holder
|
|
97
|
+
of the Standard Version, under the Original License, so that the
|
|
98
|
+
Copyright Holder may include your modifications in the Standard
|
|
99
|
+
Version.
|
|
100
|
+
* **(b)** ensure that installation of your Modified Version does not
|
|
101
|
+
prevent the user installing or running the Standard Version. In
|
|
102
|
+
addition, the Modified Version must bear a name that is different
|
|
103
|
+
from the name of the Standard Version.
|
|
104
|
+
* **(c)** allow anyone who receives a copy of the Modified Version to
|
|
105
|
+
make the Source form of the Modified Version available to others
|
|
106
|
+
under
|
|
107
|
+
* **(i)** the Original License or
|
|
108
|
+
* **(ii)** a license that permits the licensee to freely copy,
|
|
109
|
+
modify and redistribute the Modified Version using the same
|
|
110
|
+
licensing terms that apply to the copy that the licensee
|
|
111
|
+
received, and requires that the Source form of the Modified
|
|
112
|
+
Version, and of any works derived from it, be made freely
|
|
113
|
+
available in that license fees are prohibited but Distributor
|
|
114
|
+
Fees are allowed.
|
|
115
|
+
|
|
116
|
+
### Distribution of Compiled Forms of the Standard Version
|
|
117
|
+
### or Modified Versions without the Source
|
|
118
|
+
|
|
119
|
+
**(5)** You may Distribute Compiled forms of the Standard Version without
|
|
120
|
+
the Source, provided that you include complete instructions on how to
|
|
121
|
+
get the Source of the Standard Version. Such instructions must be
|
|
122
|
+
valid at the time of your distribution. If these instructions, at any
|
|
123
|
+
time while you are carrying out such distribution, become invalid, you
|
|
124
|
+
must provide new instructions on demand or cease further distribution.
|
|
125
|
+
If you provide valid instructions or cease distribution within thirty
|
|
126
|
+
days after you become aware that the instructions are invalid, then
|
|
127
|
+
you do not forfeit any of your rights under this license.
|
|
128
|
+
|
|
129
|
+
**(6)** You may Distribute a Modified Version in Compiled form without
|
|
130
|
+
the Source, provided that you comply with Section 4 with respect to
|
|
131
|
+
the Source of the Modified Version.
|
|
132
|
+
|
|
133
|
+
### Aggregating or Linking the Package
|
|
134
|
+
|
|
135
|
+
**(7)** You may aggregate the Package (either the Standard Version or
|
|
136
|
+
Modified Version) with other packages and Distribute the resulting
|
|
137
|
+
aggregation provided that you do not charge a licensing fee for the
|
|
138
|
+
Package. Distributor Fees are permitted, and licensing fees for other
|
|
139
|
+
components in the aggregation are permitted. The terms of this license
|
|
140
|
+
apply to the use and Distribution of the Standard or Modified Versions
|
|
141
|
+
as included in the aggregation.
|
|
142
|
+
|
|
143
|
+
**(8)** You are permitted to link Modified and Standard Versions with
|
|
144
|
+
other works, to embed the Package in a larger work of your own, or to
|
|
145
|
+
build stand-alone binary or bytecode versions of applications that
|
|
146
|
+
include the Package, and Distribute the result without restriction,
|
|
147
|
+
provided the result does not expose a direct interface to the Package.
|
|
148
|
+
|
|
149
|
+
### Items That are Not Considered Part of a Modified Version
|
|
150
|
+
|
|
151
|
+
**(9)** Works (including, but not limited to, modules and scripts) that
|
|
152
|
+
merely extend or make use of the Package, do not, by themselves, cause
|
|
153
|
+
the Package to be a Modified Version. In addition, such works are not
|
|
154
|
+
considered parts of the Package itself, and are not subject to the
|
|
155
|
+
terms of this license.
|
|
156
|
+
|
|
157
|
+
### General Provisions
|
|
158
|
+
|
|
159
|
+
**(10)** Any use, modification, and distribution of the Standard or
|
|
160
|
+
Modified Versions is governed by this Artistic License. By using,
|
|
161
|
+
modifying or distributing the Package, you accept this license. Do not
|
|
162
|
+
use, modify, or distribute the Package, if you do not accept this
|
|
163
|
+
license.
|
|
164
|
+
|
|
165
|
+
**(11)** If your Modified Version has been derived from a Modified
|
|
166
|
+
Version made by someone other than you, you are nevertheless required
|
|
167
|
+
to ensure that your Modified Version complies with the requirements of
|
|
168
|
+
this license.
|
|
169
|
+
|
|
170
|
+
**(12)** This license does not grant you the right to use any trademark,
|
|
171
|
+
service mark, tradename, or logo of the Copyright Holder.
|
|
172
|
+
|
|
173
|
+
**(13)** This license includes the non-exclusive, worldwide,
|
|
174
|
+
free-of-charge patent license to make, have made, use, offer to sell,
|
|
175
|
+
sell, import and otherwise transfer the Package with respect to any
|
|
176
|
+
patent claims licensable by the Copyright Holder that are necessarily
|
|
177
|
+
infringed by the Package. If you institute patent litigation
|
|
178
|
+
(including a cross-claim or counterclaim) against any party alleging
|
|
179
|
+
that the Package constitutes direct or contributory patent
|
|
180
|
+
infringement, then this Artistic License to you shall terminate on the
|
|
181
|
+
date that such litigation is filed.
|
|
182
|
+
|
|
183
|
+
**(14)** **Disclaimer of Warranty:**
|
|
184
|
+
|
|
185
|
+
THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS
|
|
186
|
+
IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED
|
|
187
|
+
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
|
|
188
|
+
NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL
|
|
189
|
+
LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL
|
|
190
|
+
BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL
|
|
191
|
+
DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, EVEN IF
|
|
192
|
+
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
# StringSplitter
|
|
2
|
+
|
|
3
|
+
[](https://travis-ci.org/chocolateboy/string_splitter)
|
|
4
|
+
[](https://rubygems.org/gems/string_splitter)
|
|
5
|
+
|
|
6
|
+
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
|
7
|
+
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
|
8
|
+
|
|
9
|
+
- [NAME](#name)
|
|
10
|
+
- [INSTALLATION](#installation)
|
|
11
|
+
- [SYNOPSIS](#synopsis)
|
|
12
|
+
- [DESCRIPTION](#description)
|
|
13
|
+
- [WHY?](#why)
|
|
14
|
+
- [VERSION](#version)
|
|
15
|
+
- [SEE ALSO](#see-also)
|
|
16
|
+
- [Gems](#gems)
|
|
17
|
+
- [Articles](#articles)
|
|
18
|
+
- [AUTHOR](#author)
|
|
19
|
+
- [COPYRIGHT AND LICENSE](#copyright-and-license)
|
|
20
|
+
|
|
21
|
+
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
|
22
|
+
|
|
23
|
+
# NAME
|
|
24
|
+
|
|
25
|
+
StringSplitter - `String#split` on steroids
|
|
26
|
+
|
|
27
|
+
# INSTALLATION
|
|
28
|
+
|
|
29
|
+
```ruby
|
|
30
|
+
gem "string_splitter"
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
# SYNOPSIS
|
|
34
|
+
|
|
35
|
+
```ruby
|
|
36
|
+
require "string_splitter"
|
|
37
|
+
|
|
38
|
+
ss = StringSplitter.new
|
|
39
|
+
|
|
40
|
+
# same as String#split
|
|
41
|
+
ss.split("foo bar baz quux")
|
|
42
|
+
# => ["foo", "bar", "baz", "quux"]
|
|
43
|
+
|
|
44
|
+
# split on the first separator
|
|
45
|
+
ss.split("foo:bar:baz:quux", ":", at: 1)
|
|
46
|
+
# => ["foo", "bar:baz:quux"]
|
|
47
|
+
|
|
48
|
+
# split on the last separator
|
|
49
|
+
ss.rsplit("foo:bar:baz:quux", ":", at: 1)
|
|
50
|
+
# => ["foo:bar:baz", "quux"]
|
|
51
|
+
|
|
52
|
+
# split on a multiple indices
|
|
53
|
+
line = "-rw-r--r-- 1 user users 87 Jun 18 18:16 CHANGELOG.md"
|
|
54
|
+
ss.split(line, at: [1..5, 8])
|
|
55
|
+
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
|
56
|
+
|
|
57
|
+
# fine-grained control via a block
|
|
58
|
+
ss.split("foo:bar:baz-baz", /[:-]/) do |i, split|
|
|
59
|
+
split.rhs == "baz" && strip.separator == "-"
|
|
60
|
+
end
|
|
61
|
+
# => ["foo:bar:baz", "baz"]
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
# DESCRIPTION
|
|
66
|
+
|
|
67
|
+
Many languages have built-in string `split` functions/methods. They behave similarly
|
|
68
|
+
(notwithstanding the occasional [surprise](https://chriszetter.com/blog/2017/10/29/splitting-strings/)),
|
|
69
|
+
and handle a few common cases e.g.:
|
|
70
|
+
|
|
71
|
+
* limiting the number of splits
|
|
72
|
+
* including the separators in the results
|
|
73
|
+
* removing (some) empty tokens
|
|
74
|
+
|
|
75
|
+
But, because the API is squeezed into two overloaded parameters (the separator and the limit),
|
|
76
|
+
achieving the desired effects can be tricky. For instance, while `String#split` removes empty
|
|
77
|
+
trailing tokens (by default), it provides no way to remove *all* empty tokens. Likewise, the
|
|
78
|
+
cramped API means there's no way to combine e.g. a limit (positive integer) with the option
|
|
79
|
+
to preserve empty tokens (negative integer).
|
|
80
|
+
|
|
81
|
+
If `split` was being written from scratch, without the baggage of its legacy API,
|
|
82
|
+
it's possible that some of these options would be made explicit rather than overloading
|
|
83
|
+
the `limit` parameter. And, indeed, this is possible in some implementations, e.g. in Crystal:
|
|
84
|
+
|
|
85
|
+
```ruby
|
|
86
|
+
":foo:bar:baz:".split(":", remove_empty: false) # => ["", "foo", "bar", "baz", ""]
|
|
87
|
+
":foo:bar:baz:".split(":", remove_empty: true) # => ["foo", "bar", "baz"]
|
|
88
|
+
````
|
|
89
|
+
|
|
90
|
+
StringSplitter takes this one step further by moving the configuration out of the method altogether
|
|
91
|
+
and delegating the strategy — i.e. which splits should be accepted or rejected — to a block:
|
|
92
|
+
|
|
93
|
+
```ruby
|
|
94
|
+
ss = StringSplitter.new
|
|
95
|
+
|
|
96
|
+
ss.split("foo:bar:baz", ":") { |i| i == 1 } # => ["foo", "bar:baz"]
|
|
97
|
+
ss.rsplit("foo:bar:baz", ":") { |i| i == 1 } # => ["foo:bar", "baz"]
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
As a shortcut, the common case of splitting at one or more indices can be specified via an option:
|
|
101
|
+
|
|
102
|
+
```ruby
|
|
103
|
+
ss.split('foo:bar:baz:quux', ':', at: [1, 3]) # => ["foo", "bar:baz", "quux"]
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
# WHY?
|
|
107
|
+
|
|
108
|
+
I wanted to split semi-structured output into fields without having to resort to a regex or a full-blown parser.
|
|
109
|
+
|
|
110
|
+
As an example, the nominally unstructured/human-friendly output of many Unix commands is, in practice,
|
|
111
|
+
*almost* structured. It's often tantalizingly close to being space-separated, apart from a few pesky
|
|
112
|
+
exceptions e.g.:
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
$ ls -la
|
|
116
|
+
|
|
117
|
+
-rw-r--r-- 1 user users 87 Jun 18 18:16 CHANGELOG.md
|
|
118
|
+
-rw-r--r-- 1 user users 254 Jun 19 21:21 Gemfile
|
|
119
|
+
drwxr-xr-x 3 user users 4096 Jun 19 22:56 lib
|
|
120
|
+
-rw-r--r-- 1 user users 8952 Jun 18 18:16 LICENSE.md
|
|
121
|
+
-rw-r--r-- 1 user users 3134 Jun 19 22:59 README.md
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
These lines can *almost* be parsed into an array of fields by splitting them on whitespace. The exception is the
|
|
125
|
+
date (columns 6-8) i.e.:
|
|
126
|
+
|
|
127
|
+
```ruby
|
|
128
|
+
line = "-rw-r--r-- 1 user users 87 Jun 18 18:16 CHANGELOG.md"
|
|
129
|
+
line.split
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
gives:
|
|
133
|
+
|
|
134
|
+
```ruby
|
|
135
|
+
["-rw-r--r--", "1", "user", "users", "87", "Jun", "18", "18:16", "CHANGELOG.md"]
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
instead of:
|
|
139
|
+
|
|
140
|
+
```ruby
|
|
141
|
+
["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
One way to work around this is to parse the whole line e.g.:
|
|
145
|
+
|
|
146
|
+
```ruby
|
|
147
|
+
line.match(/^(\S+) \s+ (\d+) \s+ (\S+) \s+ (\S+) \s+ (\d+) \s+ (\S+ \s+ \d+ \s+ \S+) (.+)$/x)
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
But that requires us to specify *everything*. What we really want is a version of `split`
|
|
151
|
+
that we can disable for the 6th and 7th columns i.e. manual control over which splits
|
|
152
|
+
are accepted, rather than being restricted to the single, baked-in strategy supported by
|
|
153
|
+
the `limit` parameter.
|
|
154
|
+
|
|
155
|
+
StringSplitter makes it easy to create your own splitting strategies to both emulate and
|
|
156
|
+
enhance existing behaviors and create new ones e.g., in this case:
|
|
157
|
+
|
|
158
|
+
```ruby
|
|
159
|
+
ss.split(line, at: [1..5, 8])
|
|
160
|
+
|
|
161
|
+
# => ["-rw-r--r--", "1", "user", "users", "87", "Jun 18 18:16", "CHANGELOG.md"]
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
# VERSION
|
|
165
|
+
|
|
166
|
+
0.0.1
|
|
167
|
+
|
|
168
|
+
# SEE ALSO
|
|
169
|
+
|
|
170
|
+
## Gems
|
|
171
|
+
|
|
172
|
+
- [rsplit](https://github.com/Tatzyr/rsplit) - a reverse-split implementation (only works with string separators)
|
|
173
|
+
|
|
174
|
+
## Articles
|
|
175
|
+
|
|
176
|
+
- [Splitting Strings](https://chriszetter.com/blog/2017/10/29/splitting-strings/)
|
|
177
|
+
|
|
178
|
+
# AUTHOR
|
|
179
|
+
|
|
180
|
+
[chocolateboy](mailto:chocolate@cpan.org)
|
|
181
|
+
|
|
182
|
+
# COPYRIGHT AND LICENSE
|
|
183
|
+
|
|
184
|
+
Copyright © 2018 by chocolateboy.
|
|
185
|
+
|
|
186
|
+
This is free software; you can redistribute it and/or modify it under the
|
|
187
|
+
terms of the [Artistic License 2.0](http://www.opensource.org/licenses/artistic-license-2.0.php).
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'values'
|
|
4
|
+
|
|
5
|
+
# This class extends the functionality of +String#split+ by:
|
|
6
|
+
#
|
|
7
|
+
# - providing full control over which splits are accepted or rejected
|
|
8
|
+
# - adding support for splitting from right-to-left
|
|
9
|
+
# - encapsulating splitting options/preferences in instances rather than trying to
|
|
10
|
+
# cram them in to overloaded method parameters
|
|
11
|
+
#
|
|
12
|
+
# These enhancements allow splits to handle many cases that otherwise require bigger
|
|
13
|
+
# guns e.g. regex matching or parsing.
|
|
14
|
+
class StringSplitter
|
|
15
|
+
ACCEPT = ->(_index, _split) { true }
|
|
16
|
+
|
|
17
|
+
Split = Value.new(:captures, :lhs, :rhs, :separator)
|
|
18
|
+
|
|
19
|
+
# TODO: add default_separator
|
|
20
|
+
def initialize(include_captures: true, remove_empty: false, spread_captures: true)
|
|
21
|
+
@include_captures = include_captures
|
|
22
|
+
@remove_empty = remove_empty
|
|
23
|
+
@spread_captures = spread_captures
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def split(string, delimiter = /\s+/, at: nil, &block)
|
|
27
|
+
result, block, iterator, index = split_common(string, delimiter, at, block, :forward)
|
|
28
|
+
|
|
29
|
+
return result unless iterator
|
|
30
|
+
|
|
31
|
+
iterator.each do |split|
|
|
32
|
+
next if @remove_empty && split.rhs.empty?
|
|
33
|
+
|
|
34
|
+
if result.empty?
|
|
35
|
+
next if @remove_empty && split.lhs.empty?
|
|
36
|
+
result << split.lhs
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
index += 1
|
|
40
|
+
|
|
41
|
+
if block.call(index, split)
|
|
42
|
+
if @include_captures
|
|
43
|
+
if @spread_captures
|
|
44
|
+
result += split.captures
|
|
45
|
+
else
|
|
46
|
+
result << split.captures
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
result << split.rhs
|
|
51
|
+
else
|
|
52
|
+
# append the rhs
|
|
53
|
+
result[-1] = result[-1] + split.separator + split.rhs
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
result
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
alias lsplit split
|
|
61
|
+
|
|
62
|
+
def rsplit(string, delimiter = /\s+/, at: nil, &block)
|
|
63
|
+
result, block, iterator, index = split_common(string, delimiter, at, block, :reverse)
|
|
64
|
+
|
|
65
|
+
return result unless iterator
|
|
66
|
+
|
|
67
|
+
iterator.each do |split|
|
|
68
|
+
next if @remove_empty && split.lhs.empty?
|
|
69
|
+
|
|
70
|
+
if result.empty?
|
|
71
|
+
next if @remove_empty && split.rhs.empty?
|
|
72
|
+
result.unshift(split.rhs)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
index += 1
|
|
76
|
+
|
|
77
|
+
if block.call(index, split)
|
|
78
|
+
if @include_captures
|
|
79
|
+
if @spread_captures
|
|
80
|
+
result = split.captures + result
|
|
81
|
+
else
|
|
82
|
+
result.unshift(split.captures)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
result.unshift(split.lhs)
|
|
87
|
+
else
|
|
88
|
+
# prepend the lhs
|
|
89
|
+
result[0] = split.lhs + split.separator + result[0]
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
result
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
private
|
|
97
|
+
|
|
98
|
+
def forward_iterator(parts, ncaptures)
|
|
99
|
+
parts = parts.dup
|
|
100
|
+
|
|
101
|
+
Enumerator.new do |yielder|
|
|
102
|
+
until parts.empty?
|
|
103
|
+
lhs = parts.shift
|
|
104
|
+
separator = parts.shift
|
|
105
|
+
captures = parts.shift(ncaptures)
|
|
106
|
+
rhs = parts.length == 1 ? parts.shift : parts.first
|
|
107
|
+
|
|
108
|
+
yielder << Split.with({
|
|
109
|
+
lhs: lhs,
|
|
110
|
+
rhs: rhs,
|
|
111
|
+
separator: separator,
|
|
112
|
+
captures: captures,
|
|
113
|
+
})
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def reverse_iterator(parts, ncaptures)
|
|
119
|
+
parts = parts.dup
|
|
120
|
+
|
|
121
|
+
Enumerator.new do |yielder|
|
|
122
|
+
until parts.empty?
|
|
123
|
+
rhs = parts.pop
|
|
124
|
+
captures = parts.pop(ncaptures)
|
|
125
|
+
separator = parts.pop
|
|
126
|
+
lhs = parts.length == 1 ? parts.pop : parts.last
|
|
127
|
+
|
|
128
|
+
yielder << Split.with({
|
|
129
|
+
lhs: lhs,
|
|
130
|
+
rhs: rhs,
|
|
131
|
+
separator: separator,
|
|
132
|
+
captures: captures,
|
|
133
|
+
})
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# setup common to both split methods
|
|
139
|
+
def split_common(string, delimiter, at, block, type)
|
|
140
|
+
unless (match = string.match(delimiter))
|
|
141
|
+
result = (@remove_empty && string.empty?) ? [] : [string]
|
|
142
|
+
return [result]
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
unless block
|
|
146
|
+
if at
|
|
147
|
+
block = lambda do |index, _split|
|
|
148
|
+
case index when *at then true else false end
|
|
149
|
+
end
|
|
150
|
+
else
|
|
151
|
+
block = ACCEPT
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
ncaptures = match.captures.length
|
|
156
|
+
|
|
157
|
+
if delimiter.is_a?(Regexp) && ncaptures > 0
|
|
158
|
+
# increment back-references so they remain valid when the outer capture
|
|
159
|
+
# is added e.g. to split on:
|
|
160
|
+
#
|
|
161
|
+
# - <foo-comment> ... </foo-comment>
|
|
162
|
+
# - <bar-comment> ... </bar-comment>
|
|
163
|
+
#
|
|
164
|
+
# etc.
|
|
165
|
+
#
|
|
166
|
+
# before:
|
|
167
|
+
#
|
|
168
|
+
# %r| <(\w+-comment)> [^<]* </\1> |x
|
|
169
|
+
#
|
|
170
|
+
# after:
|
|
171
|
+
#
|
|
172
|
+
# %r| ( <(\w+-comment)> [^<]* </\2> ) |x
|
|
173
|
+
|
|
174
|
+
delimiter = delimiter.to_s.gsub(/\\(?:(\d+)|.)/) do
|
|
175
|
+
match = Regexp.last_match
|
|
176
|
+
match[1] ? '\\' + match[1].to_i.next.to_s : match[0]
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
parts = string.split(/(#{delimiter})/, -1)
|
|
181
|
+
iterator = method("#{type}_iterator".to_sym).call(parts, ncaptures)
|
|
182
|
+
[[], block, iterator, 0]
|
|
183
|
+
end
|
|
184
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: string_splitter
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- chocolateboy
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2018-06-20 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: values
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.8'
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '1.8'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: bundler
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '1.16'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '1.16'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: minitest
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '5.11'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '5.11'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: minitest-power_assert
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: 0.3.0
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: 0.3.0
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: minitest-reporters
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - "~>"
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '1.3'
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '1.3'
|
|
83
|
+
- !ruby/object:Gem::Dependency
|
|
84
|
+
name: rake
|
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - "~>"
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '10.0'
|
|
90
|
+
type: :development
|
|
91
|
+
prerelease: false
|
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
93
|
+
requirements:
|
|
94
|
+
- - "~>"
|
|
95
|
+
- !ruby/object:Gem::Version
|
|
96
|
+
version: '10.0'
|
|
97
|
+
- !ruby/object:Gem::Dependency
|
|
98
|
+
name: rubocop
|
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
|
100
|
+
requirements:
|
|
101
|
+
- - "~>"
|
|
102
|
+
- !ruby/object:Gem::Version
|
|
103
|
+
version: 0.54.0
|
|
104
|
+
type: :development
|
|
105
|
+
prerelease: false
|
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
+
requirements:
|
|
108
|
+
- - "~>"
|
|
109
|
+
- !ruby/object:Gem::Version
|
|
110
|
+
version: 0.54.0
|
|
111
|
+
description:
|
|
112
|
+
email: chocolate@cpan.org
|
|
113
|
+
executables: []
|
|
114
|
+
extensions: []
|
|
115
|
+
extra_rdoc_files: []
|
|
116
|
+
files:
|
|
117
|
+
- CHANGELOG.md
|
|
118
|
+
- LICENSE.md
|
|
119
|
+
- README.md
|
|
120
|
+
- lib/string_splitter.rb
|
|
121
|
+
- lib/string_splitter/version.rb
|
|
122
|
+
homepage: https://github.com/chocolateboy/string_splitter
|
|
123
|
+
licenses:
|
|
124
|
+
- Artistic-2.0
|
|
125
|
+
metadata:
|
|
126
|
+
allowed_push_host: https://rubygems.org
|
|
127
|
+
bug_tracker_uri: https://github.com/chocolateboy/string_splitter/issues
|
|
128
|
+
changelog_uri: https://github.com/chocolateboy/string_splitter/blob/master/CHANGELOG.md
|
|
129
|
+
source_code_uri: https://github.com/chocolateboy/string_splitter
|
|
130
|
+
post_install_message:
|
|
131
|
+
rdoc_options: []
|
|
132
|
+
require_paths:
|
|
133
|
+
- lib
|
|
134
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
135
|
+
requirements:
|
|
136
|
+
- - ">="
|
|
137
|
+
- !ruby/object:Gem::Version
|
|
138
|
+
version: '0'
|
|
139
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
|
+
requirements:
|
|
141
|
+
- - ">="
|
|
142
|
+
- !ruby/object:Gem::Version
|
|
143
|
+
version: '0'
|
|
144
|
+
requirements: []
|
|
145
|
+
rubyforge_project:
|
|
146
|
+
rubygems_version: 2.7.7
|
|
147
|
+
signing_key:
|
|
148
|
+
specification_version: 4
|
|
149
|
+
summary: String#split on steroids
|
|
150
|
+
test_files: []
|