character_set 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +14 -0
- data/README.md +24 -27
- data/character_set.gemspec +1 -1
- data/lib/character_set.rb +2 -2
- data/lib/character_set/core_ext/string_ext.rb +17 -22
- data/lib/character_set/{common_sets.rb → predefined_sets.rb} +9 -1
- data/lib/character_set/pure.rb +1 -1
- data/lib/character_set/version.rb +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e44c152c023f9b427cf8cf28f78bd77b611ce0dea7d480683dc56a834d022ce7
|
4
|
+
data.tar.gz: d0c2416e610779e44f3d2f1e961c03f2315da6f141b200ddfc48ed34742d1c8b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 13aec8dd0c27b280382b75eec1f18998330fe92711c185af93d34d8414ef5585d5f231148371d7960c6b0646dc7a675afdc500f4c5bfab7aa88a88ec07741a1f
|
7
|
+
data.tar.gz: 39eaf06a7fe1c87ee4112693b157f8cea2b2130fa32c82259c3f1625b22807e3e3f3e1fe994055330fbf3a639bc64306b98c8031522e88d548217d08dabebb3b
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# Changelog
|
2
|
+
All notable changes to this project will be documented in this file.
|
3
|
+
|
4
|
+
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
5
|
+
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
6
|
+
|
7
|
+
## [1.1.0] - 2018-09-21
|
8
|
+
|
9
|
+
### Added
|
10
|
+
- added option to reference a predefined set via Symbol in `String` extension methods
|
11
|
+
- added predefined sets `::ascii_alnum` and `::ascii_letters`
|
12
|
+
|
13
|
+
## [1.0.0] - 2018-09-02
|
14
|
+
Initial release.
|
data/README.md
CHANGED
@@ -13,6 +13,16 @@ Many parts can be used independently, e.g.:
|
|
13
13
|
|
14
14
|
## Usage
|
15
15
|
|
16
|
+
### Usage examples
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
CharacterSet.url_query.cover?('?a=(b$c;)') # => true
|
20
|
+
|
21
|
+
CharacterSet.non_ascii.delete_in!(string)
|
22
|
+
|
23
|
+
CharacterSet.emoji.sample(5) # => ["⛷", "👈", "🌞", "♑", "⛈"]
|
24
|
+
```
|
25
|
+
|
16
26
|
### Parse/Initialize
|
17
27
|
|
18
28
|
These all produce a `CharacterSet` containing `a`, `b` and `c`:
|
@@ -27,41 +37,25 @@ CharacterSet.parse('[a-c]')
|
|
27
37
|
CharacterSet.parse('\U00000061-\U00000063')
|
28
38
|
```
|
29
39
|
|
30
|
-
If the gems [`regexp_parser`](https://github.com/ammar/regexp_parser) and [`regexp_property_values`](https://github.com/janosch-x/regexp_property_values) are installed, `::of_regexp` and `::of_property` can also be used. `::of_regexp` can handle intersections, negations, and set nesting
|
40
|
+
If the gems [`regexp_parser`](https://github.com/ammar/regexp_parser) and [`regexp_property_values`](https://github.com/janosch-x/regexp_property_values) are installed, `::of_regexp` and `::of_property` can also be used. `::of_regexp` can handle intersections, negations, and set nesting.
|
31
41
|
|
32
42
|
```ruby
|
33
|
-
#
|
34
|
-
set = CharacterSet.of_regexp(/[\D&&[:ascii:]&&\p{emoji}]/)
|
35
|
-
|
36
|
-
# ... of course there are!
|
37
|
-
set.to_a(stringify: true) # => ["#", "*"]
|
43
|
+
CharacterSet.of_property('Thai') # => #<CharacterSet (size: 86)>
|
38
44
|
|
39
|
-
# with the core extension:
|
40
45
|
require 'character_set/core_ext/regexp_ext'
|
41
|
-
|
46
|
+
|
47
|
+
/[\D&&[:ascii:]&&\p{emoji}]/.character_set.size # => 2
|
42
48
|
```
|
43
49
|
|
44
|
-
###
|
50
|
+
### Predefined utility sets
|
51
|
+
|
52
|
+
`ascii`, `ascii_alnum`, `ascii_letters`, `bmp`, `crypt`, `emoji`, `newline`, `unicode`, `url_fragment`, `url_host`, `url_path`, `url_query`, `whitespace`
|
45
53
|
|
46
54
|
```ruby
|
47
|
-
CharacterSet.ascii
|
48
|
-
CharacterSet.bmp
|
49
|
-
CharacterSet.crypt
|
50
|
-
CharacterSet.emoji
|
51
|
-
CharacterSet.newline
|
52
|
-
CharacterSet.unicode
|
53
|
-
CharacterSet.url_fragment
|
54
|
-
CharacterSet.url_host
|
55
|
-
CharacterSet.url_path
|
56
|
-
CharacterSet.url_query
|
57
|
-
CharacterSet.whitespace
|
58
|
-
|
59
|
-
# e.g.
|
60
|
-
CharacterSet.url_query.cover?('?a=(b$c;)') # => true
|
61
|
-
CharacterSet.emoji.sample(5) # => ["⛷", "👈", "🌞", "♑", "⛈"]
|
55
|
+
CharacterSet.ascii # => #<CharacterSet (size: 128)>
|
62
56
|
|
63
57
|
# all can be prefixed with `non_`, e.g.
|
64
|
-
CharacterSet.non_ascii
|
58
|
+
CharacterSet.non_ascii
|
65
59
|
```
|
66
60
|
|
67
61
|
### Interact with Strings
|
@@ -95,9 +89,12 @@ There is also a core extension for String interaction.
|
|
95
89
|
require 'character_set/core_ext/string_ext'
|
96
90
|
|
97
91
|
"a\rb".character_set & CharacterSet.newline # => CharacterSet["\r"]
|
98
|
-
"a\rb".uses_character_set?(CharacterSet
|
92
|
+
"a\rb".uses_character_set?(CharacterSet['ä', 'ö', 'ü']) # => false
|
99
93
|
"a\rb".covered_by_character_set?(CharacterSet.newline) # => false
|
100
|
-
|
94
|
+
|
95
|
+
# predefined sets can also be referenced via Symbols
|
96
|
+
"a\rb".covered_by_character_set?(:ascii) # => true
|
97
|
+
"a\rb".delete_character_set(:newline) # => 'ab'
|
101
98
|
# etc.
|
102
99
|
```
|
103
100
|
|
data/character_set.gemspec
CHANGED
@@ -28,7 +28,7 @@ Gem::Specification.new do |s|
|
|
28
28
|
s.add_development_dependency 'bundler', '~> 1.16'
|
29
29
|
s.add_development_dependency 'rake', '~> 10.0'
|
30
30
|
s.add_development_dependency 'rake-compiler', '~> 1.0'
|
31
|
-
s.add_development_dependency 'regexp_parser', '~> 1.
|
31
|
+
s.add_development_dependency 'regexp_parser', '~> 1.1'
|
32
32
|
s.add_development_dependency 'regexp_property_values', '~> 0.3.2'
|
33
33
|
s.add_development_dependency 'rspec', '~> 3.0'
|
34
34
|
end
|
data/lib/character_set.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'character_set/character'
|
2
|
-
require 'character_set/common_sets'
|
3
2
|
require 'character_set/expression_converter'
|
4
3
|
require 'character_set/parser'
|
4
|
+
require 'character_set/predefined_sets'
|
5
5
|
require 'character_set/set_method_adapters'
|
6
6
|
require 'character_set/shared_methods'
|
7
7
|
require 'character_set/version'
|
@@ -17,5 +17,5 @@ class CharacterSet
|
|
17
17
|
prepend SetMethodAdapters
|
18
18
|
include Enumerable
|
19
19
|
include SharedMethods
|
20
|
-
extend
|
20
|
+
extend PredefinedSets
|
21
21
|
end
|
@@ -5,28 +5,23 @@ class CharacterSet
|
|
5
5
|
CharacterSet.of(self)
|
6
6
|
end
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
set.keep_in(self)
|
26
|
-
end
|
27
|
-
|
28
|
-
def keep_character_set!(set)
|
29
|
-
set.keep_in!(self)
|
8
|
+
{
|
9
|
+
covered_by_character_set?: :cover?,
|
10
|
+
delete_character_set: :delete_in,
|
11
|
+
delete_character_set!: :delete_in!,
|
12
|
+
keep_character_set: :keep_in,
|
13
|
+
keep_character_set!: :keep_in!,
|
14
|
+
uses_character_set?: :used_by?,
|
15
|
+
}.each do |string_method, set_method|
|
16
|
+
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
17
|
+
def #{string_method}(arg)
|
18
|
+
if arg.instance_of?(Symbol)
|
19
|
+
CharacterSet.__send__(arg).#{set_method}(self)
|
20
|
+
else
|
21
|
+
arg.#{set_method}(self)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
RUBY
|
30
25
|
end
|
31
26
|
end
|
32
27
|
end
|
@@ -1,9 +1,17 @@
|
|
1
1
|
class CharacterSet
|
2
|
-
module
|
2
|
+
module PredefinedSets
|
3
3
|
def ascii
|
4
4
|
@ascii ||= from_ranges(0..0x7F).freeze
|
5
5
|
end
|
6
6
|
|
7
|
+
def ascii_alnum
|
8
|
+
@ascii_alnum ||= from_ranges(0x30..0x39, 0x41..0x5A, 0x61..0x7A).freeze
|
9
|
+
end
|
10
|
+
|
11
|
+
def ascii_letters
|
12
|
+
@ascii_letters ||= from_ranges(0x41..0x5A, 0x61..0x7A).freeze
|
13
|
+
end
|
14
|
+
|
7
15
|
# basic multilingual plane
|
8
16
|
def bmp
|
9
17
|
@bmp ||= from_ranges(0..0xD7FF, 0xE000..0xFFFF).freeze
|
data/lib/character_set/pure.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: character_set
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: range_compressor
|
@@ -86,14 +86,14 @@ dependencies:
|
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '1.
|
89
|
+
version: '1.1'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '1.
|
96
|
+
version: '1.1'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
98
|
name: regexp_property_values
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -134,6 +134,7 @@ files:
|
|
134
134
|
- ".rspec"
|
135
135
|
- ".travis.yml"
|
136
136
|
- BENCHMARK.md
|
137
|
+
- CHANGELOG.md
|
137
138
|
- Gemfile
|
138
139
|
- LICENSE.txt
|
139
140
|
- README.md
|
@@ -151,12 +152,12 @@ files:
|
|
151
152
|
- ext/character_set/unicode_casefold_table.h
|
152
153
|
- lib/character_set.rb
|
153
154
|
- lib/character_set/character.rb
|
154
|
-
- lib/character_set/common_sets.rb
|
155
155
|
- lib/character_set/core_ext.rb
|
156
156
|
- lib/character_set/core_ext/regexp_ext.rb
|
157
157
|
- lib/character_set/core_ext/string_ext.rb
|
158
158
|
- lib/character_set/expression_converter.rb
|
159
159
|
- lib/character_set/parser.rb
|
160
|
+
- lib/character_set/predefined_sets.rb
|
160
161
|
- lib/character_set/pure.rb
|
161
162
|
- lib/character_set/ruby_fallback.rb
|
162
163
|
- lib/character_set/ruby_fallback/character_set_methods.rb
|
@@ -186,7 +187,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
186
187
|
version: '0'
|
187
188
|
requirements: []
|
188
189
|
rubyforge_project:
|
189
|
-
rubygems_version: 2.
|
190
|
+
rubygems_version: 2.7.6
|
190
191
|
signing_key:
|
191
192
|
specification_version: 4
|
192
193
|
summary: Build, read, write and compare sets of Unicode codepoints.
|