regexp_property_values 1.5.1-java → 1.5.2-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f4518acf00c9367623e0e052c08972c13801ad749dd1bf4f7df8341cad28a639
4
- data.tar.gz: 3d557814376809cede7762b4680ce043714463f3857e04ba8d66d29cfbb212f3
3
+ metadata.gz: 90678821cd4aa4d97e9354fa51582ac7e23c57c7f84f897338200ba9b17ac67d
4
+ data.tar.gz: b8d8f3a29eae253c8a58d9f8e3ec7a481c6f6007be83602fec122ab27b472646
5
5
  SHA512:
6
- metadata.gz: 15d768da9c13f1cd45c871a0eb63b674dc5f86d46a95cd31fe7b3b65a998dbd397fae9af02d14ea6256db2d019020bb6feb277e0b1875f9b20ddac6ace5b20a8
7
- data.tar.gz: 47778cf3fbf16427c6c5b27f7ab6a91431227cdae0c2b53ace0e6b0158018a04947975f52895ea985356d988cd7f15784e75d0e289fcbf2e880e07be7c2117be
6
+ metadata.gz: 2f43330b2f6d16d9ec879b128909477e2fd5e617ae96266f36fd1373c7795af1912d7b88af7c998b87d5df41449855b1b41c5cd3821f3bffd6c0331a3c14db77
7
+ data.tar.gz: ec20a5e4bbd837d3f5be01333fa0c5969549ad90d55ff50ad5545fcc86383969f7b9e018b5ef33d882e8d0818e904599f089ad3919e4c13774ea404e261f7b20
data/CHANGELOG.md CHANGED
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
6
6
 
7
+ ## [1.5.2] - 2024-05-20
8
+
9
+ ### Fixed
10
+ - fixed `#matched_characters` for properties containing surrogates
11
+ - improved speed of `#matched_codepoints`
12
+
7
13
  ## [1.5.1] - 2024-01-06
8
14
 
9
15
  ### Fixed
data/Gemfile CHANGED
@@ -5,7 +5,7 @@ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
5
5
  # Specify your gem's dependencies in regexp_property_values.gemspec
6
6
  gemspec
7
7
 
8
- gem 'character_set', '~> 1.4.0'
8
+ gem 'character_set', '~> 1.8.0'
9
9
  gem 'rake', '~> 13.0'
10
10
  gem 'rake-compiler', '~> 1.0'
11
11
  gem 'range_compressor', '~> 1.0'
data/README.md CHANGED
@@ -34,6 +34,8 @@ PV['foobar'].supported_by_current_ruby? # => false
34
34
  PV['AHex'].matched_characters # => %w[0 1 2 3 4 5 6 7 8 9 A B C ...]
35
35
  PV['AHex'].matched_codepoints # => [48, 49, 50, ...]
36
36
  PV['AHex'].matched_ranges # => [48..57, 65..70, 97..102]
37
+ # Note: #matched_characters is slow for large properties and you
38
+ # may not want to use it in time-critical code. It also omits surrogates.
37
39
 
38
40
  PV['foobar'].matched_ranges # => RegexpPropertyValues::Error
39
41
  ```
data/bin/console CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "bundler/setup"
4
4
  require "regexp_property_values"
5
+ require "benchmark"
5
6
 
6
7
  # You can add fixtures and/or initialization code here to make experimenting
7
8
  # with your gem easier. You can also use a different console, if you like.
@@ -2,11 +2,13 @@
2
2
  #include "ruby/encoding.h"
3
3
  #include "ruby/oniguruma.h" // still in recent rubies f. backwards compatibility
4
4
 
5
- static int prop_name_to_ctype(char *name, rb_encoding *enc)
5
+ static int prop_name_to_ctype(VALUE arg, rb_encoding *enc)
6
6
  {
7
+ char *name;
7
8
  UChar *uname;
8
9
  int ctype;
9
10
 
11
+ name = StringValueCStr(arg);
10
12
  uname = (UChar *)name;
11
13
  ctype = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, uname, uname + strlen(name));
12
14
  if (ctype < 0)
@@ -15,13 +17,26 @@ static int prop_name_to_ctype(char *name, rb_encoding *enc)
15
17
  return ctype;
16
18
  }
17
19
 
18
- VALUE onig_ranges_to_rb(const OnigCodePoint *onig_ranges)
20
+ const OnigCodePoint *get_onig_ranges(VALUE prop_name)
21
+ {
22
+ int ctype;
23
+ const OnigCodePoint *ranges;
24
+ OnigCodePoint sb_out;
25
+ rb_encoding *enc;
26
+
27
+ enc = rb_utf8_encoding();
28
+ ctype = prop_name_to_ctype(prop_name, enc);
29
+ ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
30
+ return ranges;
31
+ }
32
+
33
+ VALUE onig_ranges_to_rb_ranges(const OnigCodePoint *onig_ranges)
19
34
  {
20
35
  unsigned int range_count, i;
21
36
  VALUE result, sub_range;
22
37
 
23
38
  range_count = onig_ranges[0];
24
- result = rb_ary_new2(range_count); // rb_ary_new_capa not avail. in Ruby 2.0
39
+ result = rb_ary_new_capa(range_count);
25
40
 
26
41
  for (i = 0; i < range_count; i++)
27
42
  {
@@ -34,24 +49,35 @@ VALUE onig_ranges_to_rb(const OnigCodePoint *onig_ranges)
34
49
  return result;
35
50
  }
36
51
 
37
- VALUE rb_prop_ranges(char *name)
52
+ VALUE onig_ranges_to_rb_integers(const OnigCodePoint *onig_ranges)
38
53
  {
39
- int ctype;
40
- const OnigCodePoint *onig_ranges;
41
- OnigCodePoint sb_out;
42
- rb_encoding *enc;
43
- enc = rb_utf8_encoding();
54
+ unsigned int range_count, i, beg, end, j;
55
+ VALUE result;
56
+
57
+ range_count = onig_ranges[0];
58
+ result = rb_ary_new();
44
59
 
45
- ctype = prop_name_to_ctype(name, enc);
46
- ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &onig_ranges);
47
- return onig_ranges_to_rb(onig_ranges);
60
+ for (i = 0; i < range_count; i++)
61
+ {
62
+ beg = onig_ranges[(i * 2) + 1];
63
+ end = onig_ranges[(i * 2) + 2];
64
+ for (j = beg; j <= end; j++)
65
+ {
66
+ rb_ary_push(result, INT2FIX(j));
67
+ }
68
+ }
69
+
70
+ return result;
48
71
  }
49
72
 
50
73
  VALUE method_matched_ranges(VALUE self, VALUE arg)
51
74
  {
52
- char *prop_name;
53
- prop_name = StringValueCStr(arg);
54
- return rb_prop_ranges(prop_name);
75
+ return onig_ranges_to_rb_ranges(get_onig_ranges(arg));
76
+ }
77
+
78
+ VALUE method_matched_codepoints(VALUE self, VALUE arg)
79
+ {
80
+ return onig_ranges_to_rb_integers(get_onig_ranges(arg));
55
81
  }
56
82
 
57
83
  void Init_regexp_property_values()
@@ -63,4 +89,5 @@ void Init_regexp_property_values()
63
89
  VALUE module;
64
90
  module = rb_define_module("OnigRegexpPropertyHelper");
65
91
  rb_define_singleton_method(module, "matched_ranges", method_matched_ranges, 1);
92
+ rb_define_singleton_method(module, "matched_codepoints", method_matched_codepoints, 1);
66
93
  }
@@ -2,11 +2,17 @@ module RegexpPropertyValues
2
2
  class Value
3
3
  module ExtAdapter
4
4
  def matched_characters
5
- matched_codepoints.map { |cp| cp.chr('utf-8') }
5
+ acc = []
6
+ matched_codepoints.each do |cp|
7
+ acc << cp.chr('utf-8') if cp < 0xD800 || cp > 0xDFFF
8
+ end
9
+ acc
6
10
  end
7
11
 
8
12
  def matched_codepoints
9
- matched_ranges.flat_map(&:to_a)
13
+ OnigRegexpPropertyHelper.matched_codepoints(name)
14
+ rescue ArgumentError
15
+ raise_unsupported_or_unknown_error
10
16
  end
11
17
 
12
18
  def matched_ranges
@@ -1,3 +1,3 @@
1
1
  module RegexpPropertyValues
2
- VERSION = '1.5.1'
2
+ VERSION = '1.5.2'
3
3
  end
@@ -22,5 +22,5 @@ Gem::Specification.new do |s|
22
22
 
23
23
  s.extensions = %w[ext/regexp_property_values/extconf.rb]
24
24
 
25
- s.required_ruby_version = '>= 2.0.0'
25
+ s.required_ruby_version = '>= 2.1.0'
26
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_property_values
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.1
4
+ version: 1.5.2
5
5
  platform: java
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-06 00:00:00.000000000 Z
11
+ date: 2024-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: range_compressor
@@ -67,14 +67,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
67
67
  requirements:
68
68
  - - ">="
69
69
  - !ruby/object:Gem::Version
70
- version: 2.0.0
70
+ version: 2.1.0
71
71
  required_rubygems_version: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  requirements: []
77
- rubygems_version: 3.5.0.dev
77
+ rubygems_version: 3.5.3
78
78
  signing_key:
79
79
  specification_version: 4
80
80
  summary: Inspect property values supported by Ruby's regex engine