rust_regexp 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b40dce5345e9d2dddf0899c5967ed4804c7ed37ff9c7c9dabd7fd0071690fcc1
4
- data.tar.gz: ea50b0251c8c8b1be2fc0ebe2bd368185e7f01107d0747ac694b615565cf53e0
3
+ metadata.gz: 3fa462b537e9c799b5939981d347a240e31d6cf982cee50d3c63dff6f2fbc98b
4
+ data.tar.gz: bf19b0f1a052b8961645f216e4b2ce9ee4168f2447311752e1c7550f57f64286
5
5
  SHA512:
6
- metadata.gz: 7a7307c4f68f3e56a2574ec531610f410681df01af682cf04592f6a9393318e5c820eed11c9fb90009ea0a5e08e4dfeb5a07be6f7a8c513809ac8000fecfcd50
7
- data.tar.gz: 17565b8d2ee5f841fd4cb10d4d37c4feedfd3bd40db8c40a2437287f558abc213de5c67f586e5771c0830153d41102f436d831655056a52d79d31214fcaf3c2f
6
+ metadata.gz: 4cf864254f850c217dfb4c74a861c59748de044306a43a702c9c4fc677fa5a2afef9e9231a24e47f6bbce6ee6ae382f370463fafa8782e290e15d24728d1c932
7
+ data.tar.gz: 3e8f831f749e1ff403d4b72b5e49ad07ae7603a878086cd192cbe396760cb3682a02715f43cdcce49a4ef2a545255225ab8db33af91c9de17e89f23e53327c82
data/README.md CHANGED
@@ -114,6 +114,28 @@ RustRegexp::Set.new(["abc", "def"]).patterns
114
114
  # => ["abc", "def"]
115
115
  ```
116
116
 
117
+ ## Encoding
118
+
119
+ Currently, `rust_regexp` expects the haystack to be an UTF-8 string.
120
+
121
+ It also supports parsing of strings with invalid UTF-8 characters by default. It's achieved via using `regex::bytes` instead of plain `regex` under the hood, so any byte sequence can be matched. The output match is encoded as UTF-8 string.
122
+
123
+ In case unicode awarness of matchers should be disabled, both `RustRegexp` and `RustRegexp::Set` support `unicode: false` option:
124
+
125
+ ```ruby
126
+ RustRegexp.new('\w+').match('ю٤夏')
127
+ # => ["ю٤夏"]
128
+
129
+ RustRegexp.new('\w+', unicode: false).match('ю٤夏')
130
+ # => []
131
+
132
+ RustRegexp::Set.new(['\w', '\d', '\s']).match("ю٤\u2000")
133
+ # => [0, 1, 2]
134
+
135
+ RustRegexp::Set.new(['\w', '\d', '\s'], unicode: false).match("ю٤\u2000")
136
+ # => []
137
+ ```
138
+
117
139
  ## Development
118
140
 
119
141
  ```sh
@@ -1,28 +1,29 @@
1
1
  use magnus::{
2
- class,
3
- define_class,
2
+ class, define_class,
4
3
  encoding::RbEncoding,
5
- exception,
6
- function,
7
- method,
8
- prelude::*,
9
- scan_args::scan_args,
10
- Value,
11
- Error,
12
- RString,
13
- RArray,
4
+ exception, function, method,
5
+ scan_args::{get_kwargs, scan_args},
6
+ Error, Module, Object, RArray, RHash, RString, Value,
14
7
  };
15
- use regex::bytes::{Regex, RegexSet, Match};
8
+ use regex::bytes::{Match, Regex, RegexBuilder, RegexSet, RegexSetBuilder};
16
9
 
17
10
  #[magnus::wrap(class = "RustRegexp", free_immediately, size)]
18
11
  pub struct RustRegexp(Regex);
19
12
 
20
13
  impl RustRegexp {
21
14
  pub fn new(args: &[Value]) -> Result<Self, Error> {
22
- let args = scan_args::<(String,), (), (), (), (), ()>(args)?;
15
+ let args = scan_args::<(String,), (), (), (), RHash, ()>(args)?;
16
+ let kwargs = get_kwargs::<_, (), (Option<bool>,), ()>(args.keywords, &[], &["unicode"])?;
17
+
23
18
  let pattern = args.required.0;
19
+ let (unicode,) = kwargs.optional;
20
+ let unicode = unicode.unwrap_or_else(|| true);
24
21
 
25
- let regex = Regex::new(&pattern).map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
22
+ let mut builder = RegexBuilder::new(&pattern);
23
+ let regex = builder
24
+ .unicode(unicode)
25
+ .build()
26
+ .map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
26
27
 
27
28
  Ok(Self(regex))
28
29
  }
@@ -90,9 +91,7 @@ impl RustRegexp {
90
91
  }
91
92
  }
92
93
 
93
- result
94
- .push(group)
95
- .expect("Non-frozen array");
94
+ result.push(group).expect("Non-frozen array");
96
95
  }
97
96
  }
98
97
 
@@ -113,10 +112,7 @@ impl RustRegexp {
113
112
  }
114
113
 
115
114
  fn capture_to_ruby_string(capture: &Match) -> RString {
116
- RString::enc_new(
117
- capture.as_bytes(),
118
- RbEncoding::utf8()
119
- )
115
+ RString::enc_new(capture.as_bytes(), RbEncoding::utf8())
120
116
  }
121
117
  }
122
118
 
@@ -125,10 +121,18 @@ pub struct RustRegexpSet(RegexSet);
125
121
 
126
122
  impl RustRegexpSet {
127
123
  pub fn new(args: &[Value]) -> Result<Self, Error> {
128
- let args = scan_args::<(Vec<String>,), (), (), (), (), ()>(args)?;
124
+ let args = scan_args::<(Vec<String>,), (), (), (), RHash, ()>(args)?;
125
+ let kwargs = get_kwargs::<_, (), (Option<bool>,), ()>(args.keywords, &[], &["unicode"])?;
126
+
129
127
  let patterns = args.required.0;
128
+ let (unicode,) = kwargs.optional;
129
+ let unicode = unicode.unwrap_or_else(|| true);
130
130
 
131
- let set = RegexSet::new(patterns).map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
131
+ let mut builder = RegexSetBuilder::new(patterns);
132
+ let set = builder
133
+ .unicode(unicode)
134
+ .build()
135
+ .map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
132
136
 
133
137
  Ok(Self(set))
134
138
  }
@@ -137,7 +141,7 @@ impl RustRegexpSet {
137
141
  let set = &self.0;
138
142
  let haystack = unsafe { haystack.as_slice() };
139
143
 
140
- set.matches(haystack).into_iter().collect()
144
+ set.matches(haystack).iter().collect()
141
145
  }
142
146
 
143
147
  pub fn is_match(&self, haystack: RString) -> bool {
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class RustRegexp
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.2"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust_regexp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmytro Horoshko