character_set 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: d2e4067480e00d5d03db2bbd1ee4f222f936e0f2
4
- data.tar.gz: 0e4c0bc6cf393b1a81dc368ee86f94d0dea10a82
2
+ SHA256:
3
+ metadata.gz: e44c152c023f9b427cf8cf28f78bd77b611ce0dea7d480683dc56a834d022ce7
4
+ data.tar.gz: d0c2416e610779e44f3d2f1e961c03f2315da6f141b200ddfc48ed34742d1c8b
5
5
  SHA512:
6
- metadata.gz: d9150168393512190a496ed10af91a1eaa49eb2a01d3fb623de9586eb4fbd354dfea172bf6174ab180f6620ae6ca13a01f94ec26a95fbf118f48f611b4d7acd7
7
- data.tar.gz: cb4b067fae5c8a550267a0dcef7708b30d36598b2ed18981711ad9b4a67b23cbf444270f7006d160e50f151ba32fe3402108429d415f7adbfb0be9160fedfda7
6
+ metadata.gz: 13aec8dd0c27b280382b75eec1f18998330fe92711c185af93d34d8414ef5585d5f231148371d7960c6b0646dc7a675afdc500f4c5bfab7aa88a88ec07741a1f
7
+ data.tar.gz: 39eaf06a7fe1c87ee4112693b157f8cea2b2130fa32c82259c3f1625b22807e3e3f3e1fe994055330fbf3a639bc64306b98c8031522e88d548217d08dabebb3b
data/CHANGELOG.md ADDED
@@ -0,0 +1,14 @@
1
+ # Changelog
2
+ All notable changes to this project will be documented in this file.
3
+
4
+ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
5
+ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [1.1.0] - 2018-09-21
8
+
9
+ ### Added
10
+ - added option to reference a predefined set via Symbol in `String` extension methods
11
+ - added predefined sets `::ascii_alnum` and `::ascii_letters`
12
+
13
+ ## [1.0.0] - 2018-09-02
14
+ Initial release.
data/README.md CHANGED
@@ -13,6 +13,16 @@ Many parts can be used independently, e.g.:
13
13
 
14
14
  ## Usage
15
15
 
16
+ ### Usage examples
17
+
18
+ ```ruby
19
+ CharacterSet.url_query.cover?('?a=(b$c;)') # => true
20
+
21
+ CharacterSet.non_ascii.delete_in!(string)
22
+
23
+ CharacterSet.emoji.sample(5) # => ["⛷", "👈", "🌞", "♑", "⛈"]
24
+ ```
25
+
16
26
  ### Parse/Initialize
17
27
 
18
28
  These all produce a `CharacterSet` containing `a`, `b` and `c`:
@@ -27,41 +37,25 @@ CharacterSet.parse('[a-c]')
27
37
  CharacterSet.parse('\U00000061-\U00000063')
28
38
  ```
29
39
 
30
- If the gems [`regexp_parser`](https://github.com/ammar/regexp_parser) and [`regexp_property_values`](https://github.com/janosch-x/regexp_property_values) are installed, `::of_regexp` and `::of_property` can also be used. `::of_regexp` can handle intersections, negations, and set nesting:
40
+ If the gems [`regexp_parser`](https://github.com/ammar/regexp_parser) and [`regexp_property_values`](https://github.com/janosch-x/regexp_property_values) are installed, `::of_regexp` and `::of_property` can also be used. `::of_regexp` can handle intersections, negations, and set nesting.
31
41
 
32
42
  ```ruby
33
- # are there any non-digit ascii chars classified as emoji?
34
- set = CharacterSet.of_regexp(/[\D&&[:ascii:]&&\p{emoji}]/)
35
-
36
- # ... of course there are!
37
- set.to_a(stringify: true) # => ["#", "*"]
43
+ CharacterSet.of_property('Thai') # => #<CharacterSet (size: 86)>
38
44
 
39
- # with the core extension:
40
45
  require 'character_set/core_ext/regexp_ext'
41
- /[a-e&&[^c]]/.character_set # => CharacterSet['a', 'b', 'd', 'e']
46
+
47
+ /[\D&&[:ascii:]&&\p{emoji}]/.character_set.size # => 2
42
48
  ```
43
49
 
44
- ### Common utility sets
50
+ ### Predefined utility sets
51
+
52
+ `ascii`, `ascii_alnum`, `ascii_letters`, `bmp`, `crypt`, `emoji`, `newline`, `unicode`, `url_fragment`, `url_host`, `url_path`, `url_query`, `whitespace`
45
53
 
46
54
  ```ruby
47
- CharacterSet.ascii
48
- CharacterSet.bmp
49
- CharacterSet.crypt
50
- CharacterSet.emoji
51
- CharacterSet.newline
52
- CharacterSet.unicode
53
- CharacterSet.url_fragment
54
- CharacterSet.url_host
55
- CharacterSet.url_path
56
- CharacterSet.url_query
57
- CharacterSet.whitespace
58
-
59
- # e.g.
60
- CharacterSet.url_query.cover?('?a=(b$c;)') # => true
61
- CharacterSet.emoji.sample(5) # => ["⛷", "👈", "🌞", "♑", "⛈"]
55
+ CharacterSet.ascii # => #<CharacterSet (size: 128)>
62
56
 
63
57
  # all can be prefixed with `non_`, e.g.
64
- CharacterSet.non_ascii.delete_in(string)
58
+ CharacterSet.non_ascii
65
59
  ```
66
60
 
67
61
  ### Interact with Strings
@@ -95,9 +89,12 @@ There is also a core extension for String interaction.
95
89
  require 'character_set/core_ext/string_ext'
96
90
 
97
91
  "a\rb".character_set & CharacterSet.newline # => CharacterSet["\r"]
98
- "a\rb".uses_character_set?(CharacterSet.emoji) # => false
92
+ "a\rb".uses_character_set?(CharacterSet['ä', 'ö', 'ü']) # => false
99
93
  "a\rb".covered_by_character_set?(CharacterSet.newline) # => false
100
- "a\rb".delete_character_set(CharacterSet.newline) # => 'ab'
94
+
95
+ # predefined sets can also be referenced via Symbols
96
+ "a\rb".covered_by_character_set?(:ascii) # => true
97
+ "a\rb".delete_character_set(:newline) # => 'ab'
101
98
  # etc.
102
99
  ```
103
100
 
@@ -28,7 +28,7 @@ Gem::Specification.new do |s|
28
28
  s.add_development_dependency 'bundler', '~> 1.16'
29
29
  s.add_development_dependency 'rake', '~> 10.0'
30
30
  s.add_development_dependency 'rake-compiler', '~> 1.0'
31
- s.add_development_dependency 'regexp_parser', '~> 1.0'
31
+ s.add_development_dependency 'regexp_parser', '~> 1.1'
32
32
  s.add_development_dependency 'regexp_property_values', '~> 0.3.2'
33
33
  s.add_development_dependency 'rspec', '~> 3.0'
34
34
  end
data/lib/character_set.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require 'character_set/character'
2
- require 'character_set/common_sets'
3
2
  require 'character_set/expression_converter'
4
3
  require 'character_set/parser'
4
+ require 'character_set/predefined_sets'
5
5
  require 'character_set/set_method_adapters'
6
6
  require 'character_set/shared_methods'
7
7
  require 'character_set/version'
@@ -17,5 +17,5 @@ class CharacterSet
17
17
  prepend SetMethodAdapters
18
18
  include Enumerable
19
19
  include SharedMethods
20
- extend CommonSets
20
+ extend PredefinedSets
21
21
  end
@@ -5,28 +5,23 @@ class CharacterSet
5
5
  CharacterSet.of(self)
6
6
  end
7
7
 
8
- def covered_by_character_set?(set)
9
- set.cover?(self)
10
- end
11
-
12
- def uses_character_set?(set)
13
- set.used_by?(self)
14
- end
15
-
16
- def delete_character_set(set)
17
- set.delete_in(self)
18
- end
19
-
20
- def delete_character_set!(set)
21
- set.delete_in!(self)
22
- end
23
-
24
- def keep_character_set(set)
25
- set.keep_in(self)
26
- end
27
-
28
- def keep_character_set!(set)
29
- set.keep_in!(self)
8
+ {
9
+ covered_by_character_set?: :cover?,
10
+ delete_character_set: :delete_in,
11
+ delete_character_set!: :delete_in!,
12
+ keep_character_set: :keep_in,
13
+ keep_character_set!: :keep_in!,
14
+ uses_character_set?: :used_by?,
15
+ }.each do |string_method, set_method|
16
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
17
+ def #{string_method}(arg)
18
+ if arg.instance_of?(Symbol)
19
+ CharacterSet.__send__(arg).#{set_method}(self)
20
+ else
21
+ arg.#{set_method}(self)
22
+ end
23
+ end
24
+ RUBY
30
25
  end
31
26
  end
32
27
  end
@@ -1,9 +1,17 @@
1
1
  class CharacterSet
2
- module CommonSets
2
+ module PredefinedSets
3
3
  def ascii
4
4
  @ascii ||= from_ranges(0..0x7F).freeze
5
5
  end
6
6
 
7
+ def ascii_alnum
8
+ @ascii_alnum ||= from_ranges(0x30..0x39, 0x41..0x5A, 0x61..0x7A).freeze
9
+ end
10
+
11
+ def ascii_letters
12
+ @ascii_letters ||= from_ranges(0x41..0x5A, 0x61..0x7A).freeze
13
+ end
14
+
7
15
  # basic multilingual plane
8
16
  def bmp
9
17
  @bmp ||= from_ranges(0..0xD7FF, 0xE000..0xFFFF).freeze
@@ -8,6 +8,6 @@ class CharacterSet
8
8
  prepend CharacterSet::RubyFallback
9
9
  prepend CharacterSet::SetMethodAdapters
10
10
  include CharacterSet::SharedMethods
11
- extend CharacterSet::CommonSets
11
+ extend CharacterSet::PredefinedSets
12
12
  end
13
13
  end
@@ -1,3 +1,3 @@
1
1
  class CharacterSet
2
- VERSION = '1.0.0'
2
+ VERSION = '1.1.0'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: character_set
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-02 00:00:00.000000000 Z
11
+ date: 2018-09-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: range_compressor
@@ -86,14 +86,14 @@ dependencies:
86
86
  requirements:
87
87
  - - "~>"
88
88
  - !ruby/object:Gem::Version
89
- version: '1.0'
89
+ version: '1.1'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
- version: '1.0'
96
+ version: '1.1'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: regexp_property_values
99
99
  requirement: !ruby/object:Gem::Requirement
@@ -134,6 +134,7 @@ files:
134
134
  - ".rspec"
135
135
  - ".travis.yml"
136
136
  - BENCHMARK.md
137
+ - CHANGELOG.md
137
138
  - Gemfile
138
139
  - LICENSE.txt
139
140
  - README.md
@@ -151,12 +152,12 @@ files:
151
152
  - ext/character_set/unicode_casefold_table.h
152
153
  - lib/character_set.rb
153
154
  - lib/character_set/character.rb
154
- - lib/character_set/common_sets.rb
155
155
  - lib/character_set/core_ext.rb
156
156
  - lib/character_set/core_ext/regexp_ext.rb
157
157
  - lib/character_set/core_ext/string_ext.rb
158
158
  - lib/character_set/expression_converter.rb
159
159
  - lib/character_set/parser.rb
160
+ - lib/character_set/predefined_sets.rb
160
161
  - lib/character_set/pure.rb
161
162
  - lib/character_set/ruby_fallback.rb
162
163
  - lib/character_set/ruby_fallback/character_set_methods.rb
@@ -186,7 +187,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
186
187
  version: '0'
187
188
  requirements: []
188
189
  rubyforge_project:
189
- rubygems_version: 2.2.2
190
+ rubygems_version: 2.7.6
190
191
  signing_key:
191
192
  specification_version: 4
192
193
  summary: Build, read, write and compare sets of Unicode codepoints.