regexp_property_values 1.5.1-java → 1.5.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f4518acf00c9367623e0e052c08972c13801ad749dd1bf4f7df8341cad28a639
4
- data.tar.gz: 3d557814376809cede7762b4680ce043714463f3857e04ba8d66d29cfbb212f3
3
+ metadata.gz: 90678821cd4aa4d97e9354fa51582ac7e23c57c7f84f897338200ba9b17ac67d
4
+ data.tar.gz: b8d8f3a29eae253c8a58d9f8e3ec7a481c6f6007be83602fec122ab27b472646
5
5
  SHA512:
6
- metadata.gz: 15d768da9c13f1cd45c871a0eb63b674dc5f86d46a95cd31fe7b3b65a998dbd397fae9af02d14ea6256db2d019020bb6feb277e0b1875f9b20ddac6ace5b20a8
7
- data.tar.gz: 47778cf3fbf16427c6c5b27f7ab6a91431227cdae0c2b53ace0e6b0158018a04947975f52895ea985356d988cd7f15784e75d0e289fcbf2e880e07be7c2117be
6
+ metadata.gz: 2f43330b2f6d16d9ec879b128909477e2fd5e617ae96266f36fd1373c7795af1912d7b88af7c998b87d5df41449855b1b41c5cd3821f3bffd6c0331a3c14db77
7
+ data.tar.gz: ec20a5e4bbd837d3f5be01333fa0c5969549ad90d55ff50ad5545fcc86383969f7b9e018b5ef33d882e8d0818e904599f089ad3919e4c13774ea404e261f7b20
data/CHANGELOG.md CHANGED
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
4
4
  The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
5
5
  and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
6
6
 
7
+ ## [1.5.2] - 2024-05-20
8
+
9
+ ### Fixed
10
+ - fixed `#matched_characters` for properties containing surrogates
11
+ - improved speed of `#matched_codepoints`
12
+
7
13
  ## [1.5.1] - 2024-01-06
8
14
 
9
15
  ### Fixed
data/Gemfile CHANGED
@@ -5,7 +5,7 @@ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
5
5
  # Specify your gem's dependencies in regexp_property_values.gemspec
6
6
  gemspec
7
7
 
8
- gem 'character_set', '~> 1.4.0'
8
+ gem 'character_set', '~> 1.8.0'
9
9
  gem 'rake', '~> 13.0'
10
10
  gem 'rake-compiler', '~> 1.0'
11
11
  gem 'range_compressor', '~> 1.0'
data/README.md CHANGED
@@ -34,6 +34,8 @@ PV['foobar'].supported_by_current_ruby? # => false
34
34
  PV['AHex'].matched_characters # => %w[0 1 2 3 4 5 6 7 8 9 A B C ...]
35
35
  PV['AHex'].matched_codepoints # => [48, 49, 50, ...]
36
36
  PV['AHex'].matched_ranges # => [48..57, 65..70, 97..102]
37
+ # Note: #matched_characters is slow for large properties and you
38
+ # may not want to use it in time-critical code. It also omits surrogates.
37
39
 
38
40
  PV['foobar'].matched_ranges # => RegexpPropertyValues::Error
39
41
  ```
data/bin/console CHANGED
@@ -2,6 +2,7 @@
2
2
 
3
3
  require "bundler/setup"
4
4
  require "regexp_property_values"
5
+ require "benchmark"
5
6
 
6
7
  # You can add fixtures and/or initialization code here to make experimenting
7
8
  # with your gem easier. You can also use a different console, if you like.
@@ -2,11 +2,13 @@
2
2
  #include "ruby/encoding.h"
3
3
  #include "ruby/oniguruma.h" // still in recent rubies f. backwards compatibility
4
4
 
5
- static int prop_name_to_ctype(char *name, rb_encoding *enc)
5
+ static int prop_name_to_ctype(VALUE arg, rb_encoding *enc)
6
6
  {
7
+ char *name;
7
8
  UChar *uname;
8
9
  int ctype;
9
10
 
11
+ name = StringValueCStr(arg);
10
12
  uname = (UChar *)name;
11
13
  ctype = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, uname, uname + strlen(name));
12
14
  if (ctype < 0)
@@ -15,13 +17,26 @@ static int prop_name_to_ctype(char *name, rb_encoding *enc)
15
17
  return ctype;
16
18
  }
17
19
 
18
- VALUE onig_ranges_to_rb(const OnigCodePoint *onig_ranges)
20
+ const OnigCodePoint *get_onig_ranges(VALUE prop_name)
21
+ {
22
+ int ctype;
23
+ const OnigCodePoint *ranges;
24
+ OnigCodePoint sb_out;
25
+ rb_encoding *enc;
26
+
27
+ enc = rb_utf8_encoding();
28
+ ctype = prop_name_to_ctype(prop_name, enc);
29
+ ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
30
+ return ranges;
31
+ }
32
+
33
+ VALUE onig_ranges_to_rb_ranges(const OnigCodePoint *onig_ranges)
19
34
  {
20
35
  unsigned int range_count, i;
21
36
  VALUE result, sub_range;
22
37
 
23
38
  range_count = onig_ranges[0];
24
- result = rb_ary_new2(range_count); // rb_ary_new_capa not avail. in Ruby 2.0
39
+ result = rb_ary_new_capa(range_count);
25
40
 
26
41
  for (i = 0; i < range_count; i++)
27
42
  {
@@ -34,24 +49,35 @@ VALUE onig_ranges_to_rb(const OnigCodePoint *onig_ranges)
34
49
  return result;
35
50
  }
36
51
 
37
- VALUE rb_prop_ranges(char *name)
52
+ VALUE onig_ranges_to_rb_integers(const OnigCodePoint *onig_ranges)
38
53
  {
39
- int ctype;
40
- const OnigCodePoint *onig_ranges;
41
- OnigCodePoint sb_out;
42
- rb_encoding *enc;
43
- enc = rb_utf8_encoding();
54
+ unsigned int range_count, i, beg, end, j;
55
+ VALUE result;
56
+
57
+ range_count = onig_ranges[0];
58
+ result = rb_ary_new();
44
59
 
45
- ctype = prop_name_to_ctype(name, enc);
46
- ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &onig_ranges);
47
- return onig_ranges_to_rb(onig_ranges);
60
+ for (i = 0; i < range_count; i++)
61
+ {
62
+ beg = onig_ranges[(i * 2) + 1];
63
+ end = onig_ranges[(i * 2) + 2];
64
+ for (j = beg; j <= end; j++)
65
+ {
66
+ rb_ary_push(result, INT2FIX(j));
67
+ }
68
+ }
69
+
70
+ return result;
48
71
  }
49
72
 
50
73
  VALUE method_matched_ranges(VALUE self, VALUE arg)
51
74
  {
52
- char *prop_name;
53
- prop_name = StringValueCStr(arg);
54
- return rb_prop_ranges(prop_name);
75
+ return onig_ranges_to_rb_ranges(get_onig_ranges(arg));
76
+ }
77
+
78
+ VALUE method_matched_codepoints(VALUE self, VALUE arg)
79
+ {
80
+ return onig_ranges_to_rb_integers(get_onig_ranges(arg));
55
81
  }
56
82
 
57
83
  void Init_regexp_property_values()
@@ -63,4 +89,5 @@ void Init_regexp_property_values()
63
89
  VALUE module;
64
90
  module = rb_define_module("OnigRegexpPropertyHelper");
65
91
  rb_define_singleton_method(module, "matched_ranges", method_matched_ranges, 1);
92
+ rb_define_singleton_method(module, "matched_codepoints", method_matched_codepoints, 1);
66
93
  }
@@ -2,11 +2,17 @@ module RegexpPropertyValues
2
2
  class Value
3
3
  module ExtAdapter
4
4
  def matched_characters
5
- matched_codepoints.map { |cp| cp.chr('utf-8') }
5
+ acc = []
6
+ matched_codepoints.each do |cp|
7
+ acc << cp.chr('utf-8') if cp < 0xD800 || cp > 0xDFFF
8
+ end
9
+ acc
6
10
  end
7
11
 
8
12
  def matched_codepoints
9
- matched_ranges.flat_map(&:to_a)
13
+ OnigRegexpPropertyHelper.matched_codepoints(name)
14
+ rescue ArgumentError
15
+ raise_unsupported_or_unknown_error
10
16
  end
11
17
 
12
18
  def matched_ranges
@@ -1,3 +1,3 @@
1
1
  module RegexpPropertyValues
2
- VERSION = '1.5.1'
2
+ VERSION = '1.5.2'
3
3
  end
@@ -22,5 +22,5 @@ Gem::Specification.new do |s|
22
22
 
23
23
  s.extensions = %w[ext/regexp_property_values/extconf.rb]
24
24
 
25
- s.required_ruby_version = '>= 2.0.0'
25
+ s.required_ruby_version = '>= 2.1.0'
26
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp_property_values
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.1
4
+ version: 1.5.2
5
5
  platform: java
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-06 00:00:00.000000000 Z
11
+ date: 2024-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: range_compressor
@@ -67,14 +67,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
67
67
  requirements:
68
68
  - - ">="
69
69
  - !ruby/object:Gem::Version
70
- version: 2.0.0
70
+ version: 2.1.0
71
71
  required_rubygems_version: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0'
76
76
  requirements: []
77
- rubygems_version: 3.5.0.dev
77
+ rubygems_version: 3.5.3
78
78
  signing_key:
79
79
  specification_version: 4
80
80
  summary: Inspect property values supported by Ruby's regex engine