regexp_property_values 1.5.1 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c3b4e58dbefa036bfca55b36da86baa96e1ba6c56483167279c9ce971823b8ba
4
- data.tar.gz: 5e5ed54fef70679475b4abaf35abe7e65a8d28940cde6f07398378b5eedf3e03
3
+ metadata.gz: '0588e959f6bc0ad3ef9612a495cbd3485b079b45ed9c4a485a3bd533e3ee9c7a'
4
+ data.tar.gz: abf53c421d3c895e436e0a992236d1daccda73d1da15b73ca5a3f8a8b7f94724
5
5
  SHA512:
6
- metadata.gz: d650720818f1e427b0cb4532e8e93cc348d7680c5a3c0c11e5cfba273802c32889567b0e2aff6e84247c98e60dc97022a47cb4987ed812a3d787bbf9ca98a3a0
7
- data.tar.gz: 10232496849f607db55974e99a998fd6b9f4b2efe459e0fabe03db46ccfe9f41414f1b3028f39ba27101cc17eb0afdb32cb7326d87f262c7739222a958c7e327
6
+ metadata.gz: 769d89728bcf42382782b0b42d946de6af689108b0d0b6101092eacc209f2bf50787fb1290f90545a406677bfc4da270a846de8a0dab0ab5fe4f694b53d423c9
7
+ data.tar.gz: 982ee2f8811bf1989f3789b0a52b83c89610aa7a2d00d190e36a16d16eff2a47f2ba38881373b36ac771f3f294c235207ce460d8a26ac9aee120442a3e7fe07b
data/CHANGELOG.md CHANGED
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
6
6
 
7
+ ## [1.5.2] - 2024-05-20
8
+
9
+ ### Fixed
10
+ - fixed `#matched_characters` for properties containing surrogates
11
+ - improved speed of `#matched_codepoints`
12
+
7
13
  ## [1.5.1] - 2024-01-06
8
14
 
9
15
  ### Fixed
data/Gemfile CHANGED
@@ -5,7 +5,7 @@ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
5
5
  # Specify your gem's dependencies in regexp_property_values.gemspec
6
6
  gemspec
7
7
 
8
- gem 'character_set', '~> 1.4.0'
8
+ gem 'character_set', '~> 1.8.0'
9
9
  gem 'rake', '~> 13.0'
10
10
  gem 'rake-compiler', '~> 1.0'
11
11
  gem 'range_compressor', '~> 1.0'
data/README.md CHANGED
@@ -34,6 +34,8 @@ PV['foobar'].supported_by_current_ruby? # => false
34
34
  PV['AHex'].matched_characters # => %w[0 1 2 3 4 5 6 7 8 9 A B C ...]
35
35
  PV['AHex'].matched_codepoints # => [48, 49, 50, ...]
36
36
  PV['AHex'].matched_ranges # => [48..57, 65..70, 97..102]
37
+ # Note: #matched_characters is slow for large properties and you
38
+ # may not want to use it in time-critical code. It also omits surrogates.
37
39
 
38
40
  PV['foobar'].matched_ranges # => RegexpPropertyValues::Error
39
41
  ```
data/bin/console CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "bundler/setup"
4
4
  require "regexp_property_values"
5
+ require "benchmark"
5
6
 
6
7
  # You can add fixtures and/or initialization code here to make experimenting
7
8
  # with your gem easier. You can also use a different console, if you like.
@@ -2,11 +2,13 @@
2
2
  #include "ruby/encoding.h"
3
3
  #include "ruby/oniguruma.h" // still in recent rubies f. backwards compatibility
4
4
 
5
- static int prop_name_to_ctype(char *name, rb_encoding *enc)
5
+ static int prop_name_to_ctype(VALUE arg, rb_encoding *enc)
6
6
  {
7
+ char *name;
7
8
  UChar *uname;
8
9
  int ctype;
9
10
 
11
+ name = StringValueCStr(arg);
10
12
  uname = (UChar *)name;
11
13
  ctype = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, uname, uname + strlen(name));
12
14
  if (ctype < 0)
@@ -15,13 +17,26 @@ static int prop_name_to_ctype(char *name, rb_encoding *enc)
15
17
  return ctype;
16
18
  }
17
19
 
18
- VALUE onig_ranges_to_rb(const OnigCodePoint *onig_ranges)
20
+ const OnigCodePoint *get_onig_ranges(VALUE prop_name)
21
+ {
22
+ int ctype;
23
+ const OnigCodePoint *ranges;
24
+ OnigCodePoint sb_out;
25
+ rb_encoding *enc;
26
+
27
+ enc = rb_utf8_encoding();
28
+ ctype = prop_name_to_ctype(prop_name, enc);
29
+ ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
30
+ return ranges;
31
+ }
32
+
33
+ VALUE onig_ranges_to_rb_ranges(const OnigCodePoint *onig_ranges)
19
34
  {
20
35
  unsigned int range_count, i;
21
36
  VALUE result, sub_range;
22
37
 
23
38
  range_count = onig_ranges[0];
24
- result = rb_ary_new2(range_count); // rb_ary_new_capa not avail. in Ruby 2.0
39
+ result = rb_ary_new_capa(range_count);
25
40
 
26
41
  for (i = 0; i < range_count; i++)
27
42
  {
@@ -34,24 +49,35 @@ VALUE onig_ranges_to_rb(const OnigCodePoint *onig_ranges)
34
49
  return result;
35
50
  }
36
51
 
37
- VALUE rb_prop_ranges(char *name)
52
+ VALUE onig_ranges_to_rb_integers(const OnigCodePoint *onig_ranges)
38
53
  {
39
- int ctype;
40
- const OnigCodePoint *onig_ranges;
41
- OnigCodePoint sb_out;
42
- rb_encoding *enc;
43
- enc = rb_utf8_encoding();
54
+ unsigned int range_count, i, beg, end, j;
55
+ VALUE result;
56
+
57
+ range_count = onig_ranges[0];
58
+ result = rb_ary_new();
44
59
 
45
- ctype = prop_name_to_ctype(name, enc);
46
- ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &onig_ranges);
47
- return onig_ranges_to_rb(onig_ranges);
60
+ for (i = 0; i < range_count; i++)
61
+ {
62
+ beg = onig_ranges[(i * 2) + 1];
63
+ end = onig_ranges[(i * 2) + 2];
64
+ for (j = beg; j <= end; j++)
65
+ {
66
+ rb_ary_push(result, INT2FIX(j));
67
+ }
68
+ }
69
+
70
+ return result;
48
71
  }
49
72
 
50
73
  VALUE method_matched_ranges(VALUE self, VALUE arg)
51
74
  {
52
- char *prop_name;
53
- prop_name = StringValueCStr(arg);
54
- return rb_prop_ranges(prop_name);
75
+ return onig_ranges_to_rb_ranges(get_onig_ranges(arg));
76
+ }
77
+
78
+ VALUE method_matched_codepoints(VALUE self, VALUE arg)
79
+ {
80
+ return onig_ranges_to_rb_integers(get_onig_ranges(arg));
55
81
  }
56
82
 
57
83
  void Init_regexp_property_values()
@@ -63,4 +89,5 @@ void Init_regexp_property_values()
63
89
  VALUE module;
64
90
  module = rb_define_module("OnigRegexpPropertyHelper");
65
91
  rb_define_singleton_method(module, "matched_ranges", method_matched_ranges, 1);
92
+ rb_define_singleton_method(module, "matched_codepoints", method_matched_codepoints, 1);
66
93
  }
@@ -2,11 +2,17 @@ module RegexpPropertyValues
2
2
  class Value
3
3
  module ExtAdapter
4
4
  def matched_characters
5
- matched_codepoints.map { |cp| cp.chr('utf-8') }
5
+ acc = []
6
+ matched_codepoints.each do |cp|
7
+ acc << cp.chr('utf-8') if cp < 0xD800 || cp > 0xDFFF
8
+ end
9
+ acc
6
10
  end
7
11
 
8
12
  def matched_codepoints
9
- matched_ranges.flat_map(&:to_a)
13
+ OnigRegexpPropertyHelper.matched_codepoints(name)
14
+ rescue ArgumentError
15
+ raise_unsupported_or_unknown_error
10
16
  end
11
17
 
12
18
  def matched_ranges
@@ -1,3 +1,3 @@
1
1
  module RegexpPropertyValues
2
- VERSION = '1.5.1'
2
+ VERSION = '1.5.2'
3
3
  end
@@ -22,5 +22,5 @@ Gem::Specification.new do |s|
22
22
 
23
23
  s.extensions = %w[ext/regexp_property_values/extconf.rb]
24
24
 
25
- s.required_ruby_version = '>= 2.0.0'
25
+ s.required_ruby_version = '>= 2.1.0'
26
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_property_values
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.1
4
+ version: 1.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-06 00:00:00.000000000 Z
11
+ date: 2024-05-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This small library lets you see which property values are supported by
14
14
  the regular expression engine of the Ruby version you are running, and what they
@@ -54,14 +54,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
54
54
  requirements:
55
55
  - - ">="
56
56
  - !ruby/object:Gem::Version
57
- version: 2.0.0
57
+ version: 2.1.0
58
58
  required_rubygems_version: !ruby/object:Gem::Requirement
59
59
  requirements:
60
60
  - - ">="
61
61
  - !ruby/object:Gem::Version
62
62
  version: '0'
63
63
  requirements: []
64
- rubygems_version: 3.5.0.dev
64
+ rubygems_version: 3.5.3
65
65
  signing_key:
66
66
  specification_version: 4
67
67
  summary: Inspect property values supported by Ruby's regex engine