rust_regexp 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e186368d8d88a70355a0101812a8d8cd1fce54d02bbb910a316db7cf36800cd7
4
- data.tar.gz: 7e08a707cd22e155ecde210a3e99f72eafe45efe5d998b3bdcafd0aab407e7f3
3
+ metadata.gz: 3fa462b537e9c799b5939981d347a240e31d6cf982cee50d3c63dff6f2fbc98b
4
+ data.tar.gz: bf19b0f1a052b8961645f216e4b2ce9ee4168f2447311752e1c7550f57f64286
5
5
  SHA512:
6
- metadata.gz: 71b5dfce5fdc308ff46018f00608804429fa4a58d2d41f439a0add00f5b225669fd957149917be33beb43e1b3f4831ad618d51a9b62491e8cb5562130e762133
7
- data.tar.gz: 71186a8b4fb00a89e53f4a1b736b54fdb3b0a3e3e682f04e96a10effade4704c0c602ebbbd45e150a066da574fc41f8fef7bb78f440e321d7161d486c191aa56
6
+ metadata.gz: 4cf864254f850c217dfb4c74a861c59748de044306a43a702c9c4fc677fa5a2afef9e9231a24e47f6bbce6ee6ae382f370463fafa8782e290e15d24728d1c932
7
+ data.tar.gz: 3e8f831f749e1ff403d4b72b5e49ad07ae7603a878086cd192cbe396760cb3682a02715f43cdcce49a4ef2a545255225ab8db33af91c9de17e89f23e53327c82
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
1
  # RustRegexp
2
2
 
3
- Simple bindings for [rust/regex](https://docs.rs/regex/latest/regex/) library.
3
+ [![Gem Version](https://badge.fury.io/rb/rust_regexp.svg)](https://badge.fury.io/rb/rust_regexp)
4
+ [![Test](https://github.com/ocvit/rust_regexp/workflows/CI/badge.svg)](https://github.com/ocvit/rust_regexp/actions)
5
+
6
+ Ruby bindings for [rust/regex](https://docs.rs/regex/latest/regex/) library.
4
7
 
5
8
  ## Installation
6
9
 
@@ -24,45 +27,51 @@ require "rust_regexp"
24
27
 
25
28
  ## Usage
26
29
 
27
- Regular expressions should pre-compiled before use:
30
+ Regular expressions should be pre-compiled before use:
28
31
 
29
32
  ```ruby
30
- re = RustRegexp.new('(\w+):(\d+)')
33
+ re = RustRegexp.new('p.t{2}ern*')
31
34
  # => #<RustRegexp:...>
32
35
  ```
33
36
 
34
37
  > [!TIP]
35
38
  > Note the use of *single quotes* when passing the regular expression as
36
- > a string to `rust_regexp` so that the backslashes aren't interpreted as escapes.
39
+ > a string to `rust/regex` so that the backslashes aren't interpreted as escapes.
37
40
 
38
41
  To find a single match in the haystack:
39
42
 
40
43
  ```ruby
41
- re.match("ruby:123, rust:456")
44
+ RustRegexp.new('\w+:\d+').match("ruby:123, rust:456")
45
+ # => ["ruby:123"]
46
+
47
+ RustRegexp.new('(\w+):(\d+)').match("ruby:123, rust:456")
42
48
  # => ["ruby", "123"]
43
49
  ```
44
50
 
45
51
  To find all matches in the haystack:
46
52
 
47
53
  ```ruby
48
- re.scan("ruby:123, rust:456")
54
+ RustRegexp.new('\w+:\d+').scan("ruby:123, rust:456")
55
+ # => ["ruby:123", "rust:456"]
56
+
57
+ RustRegexp.new('(\w+):(\d+)').scan("ruby:123, rust:456")
49
58
  # => [["ruby", "123"], ["rust", "456"]]
50
59
  ```
51
60
 
52
61
  To check whether there is at least one match in the haystack:
53
62
 
54
63
  ```ruby
55
- re.match?("ruby:123")
64
+ RustRegexp.new('\w+:\d+').match?("ruby:123")
56
65
  # => true
57
66
 
58
- re.match?("ruby")
67
+ RustRegexp.new('\w+:\d+').match?("ruby")
59
68
  # => false
60
69
  ```
61
70
 
62
71
  Inspect original pattern:
63
72
 
64
73
  ```ruby
65
- re.pattern
74
+ RustRegexp.new('\w+:\d+').pattern
66
75
  # => "(\\w+):(\\d+)"
67
76
  ```
68
77
 
@@ -91,18 +100,40 @@ set.match("ghidefabc") # => [0, 1, 2]
91
100
  To check whether at least one pattern from the set matches the haystack:
92
101
 
93
102
  ```ruby
94
- set.match?("abc")
103
+ RustRegexp::Set.new(["abc", "def"]).match?("abc")
95
104
  # => true
96
105
 
97
- set.match?("123")
106
+ RustRegexp::Set.new(["abc", "def"]).match?("123")
98
107
  # => false
99
108
  ```
100
109
 
101
110
  Inspect original patterns:
102
111
 
103
112
  ```ruby
104
- set.patterns
105
- # => ["abc", "def", "ghi", "xyz"]
113
+ RustRegexp::Set.new(["abc", "def"]).patterns
114
+ # => ["abc", "def"]
115
+ ```
116
+
117
+ ## Encoding
118
+
119
+ Currently, `rust_regexp` expects the haystack to be an UTF-8 string.
120
+
121
+ It also supports parsing of strings with invalid UTF-8 characters by default. It's achieved via using `regex::bytes` instead of plain `regex` under the hood, so any byte sequence can be matched. The output match is encoded as UTF-8 string.
122
+
123
+ In case unicode awarness of matchers should be disabled, both `RustRegexp` and `RustRegexp::Set` support `unicode: false` option:
124
+
125
+ ```ruby
126
+ RustRegexp.new('\w+').match('ю٤夏')
127
+ # => ["ю٤夏"]
128
+
129
+ RustRegexp.new('\w+', unicode: false).match('ю٤夏')
130
+ # => []
131
+
132
+ RustRegexp::Set.new(['\w', '\d', '\s']).match("ю٤\u2000")
133
+ # => [0, 1, 2]
134
+
135
+ RustRegexp::Set.new(['\w', '\d', '\s'], unicode: false).match("ю٤\u2000")
136
+ # => []
106
137
  ```
107
138
 
108
139
  ## Development
@@ -1,28 +1,29 @@
1
1
  use magnus::{
2
- class,
3
- define_class,
2
+ class, define_class,
4
3
  encoding::RbEncoding,
5
- exception,
6
- function,
7
- method,
8
- prelude::*,
9
- scan_args::scan_args,
10
- Value,
11
- Error,
12
- RString,
13
- RArray,
4
+ exception, function, method,
5
+ scan_args::{get_kwargs, scan_args},
6
+ Error, Module, Object, RArray, RHash, RString, Value,
14
7
  };
15
- use regex::bytes::{Regex, RegexSet, Match};
8
+ use regex::bytes::{Match, Regex, RegexBuilder, RegexSet, RegexSetBuilder};
16
9
 
17
10
  #[magnus::wrap(class = "RustRegexp", free_immediately, size)]
18
11
  pub struct RustRegexp(Regex);
19
12
 
20
13
  impl RustRegexp {
21
14
  pub fn new(args: &[Value]) -> Result<Self, Error> {
22
- let args = scan_args::<(String,), (), (), (), (), ()>(args)?;
15
+ let args = scan_args::<(String,), (), (), (), RHash, ()>(args)?;
16
+ let kwargs = get_kwargs::<_, (), (Option<bool>,), ()>(args.keywords, &[], &["unicode"])?;
17
+
23
18
  let pattern = args.required.0;
19
+ let (unicode,) = kwargs.optional;
20
+ let unicode = unicode.unwrap_or_else(|| true);
24
21
 
25
- let regex = Regex::new(&pattern).map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
22
+ let mut builder = RegexBuilder::new(&pattern);
23
+ let regex = builder
24
+ .unicode(unicode)
25
+ .build()
26
+ .map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
26
27
 
27
28
  Ok(Self(regex))
28
29
  }
@@ -33,6 +34,7 @@ impl RustRegexp {
33
34
  let regex = &self.0;
34
35
  let haystack = unsafe { haystack.as_slice() };
35
36
 
37
+ // no capture groups defined except the default one
36
38
  if regex.captures_len() == 1 {
37
39
  // speed optimization, `.find` is faster than `.captures`
38
40
  if let Some(capture) = regex.find(haystack) {
@@ -65,17 +67,12 @@ impl RustRegexp {
65
67
  let regex = &self.0;
66
68
  let haystack = unsafe { haystack.as_slice() };
67
69
 
70
+ // no capture groups defined except the default one
68
71
  if regex.captures_len() == 1 {
69
72
  // speed optimization, `.find_iter` is faster than `.captures_iter`
70
73
  for capture in regex.find_iter(haystack) {
71
- let group = RArray::with_capacity(1);
72
-
73
- group
74
- .push(Self::capture_to_ruby_string(&capture))
75
- .expect("Non-frozen array");
76
-
77
74
  result
78
- .push(group)
75
+ .push(Self::capture_to_ruby_string(&capture))
79
76
  .expect("Non-frozen array");
80
77
  }
81
78
  } else {
@@ -94,9 +91,7 @@ impl RustRegexp {
94
91
  }
95
92
  }
96
93
 
97
- result
98
- .push(group)
99
- .expect("Non-frozen array");
94
+ result.push(group).expect("Non-frozen array");
100
95
  }
101
96
  }
102
97
 
@@ -117,10 +112,7 @@ impl RustRegexp {
117
112
  }
118
113
 
119
114
  fn capture_to_ruby_string(capture: &Match) -> RString {
120
- RString::enc_new(
121
- capture.as_bytes(),
122
- RbEncoding::utf8()
123
- )
115
+ RString::enc_new(capture.as_bytes(), RbEncoding::utf8())
124
116
  }
125
117
  }
126
118
 
@@ -129,10 +121,18 @@ pub struct RustRegexpSet(RegexSet);
129
121
 
130
122
  impl RustRegexpSet {
131
123
  pub fn new(args: &[Value]) -> Result<Self, Error> {
132
- let args = scan_args::<(Vec<String>,), (), (), (), (), ()>(args)?;
124
+ let args = scan_args::<(Vec<String>,), (), (), (), RHash, ()>(args)?;
125
+ let kwargs = get_kwargs::<_, (), (Option<bool>,), ()>(args.keywords, &[], &["unicode"])?;
126
+
133
127
  let patterns = args.required.0;
128
+ let (unicode,) = kwargs.optional;
129
+ let unicode = unicode.unwrap_or_else(|| true);
134
130
 
135
- let set = RegexSet::new(patterns).map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
131
+ let mut builder = RegexSetBuilder::new(patterns);
132
+ let set = builder
133
+ .unicode(unicode)
134
+ .build()
135
+ .map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
136
136
 
137
137
  Ok(Self(set))
138
138
  }
@@ -141,7 +141,7 @@ impl RustRegexpSet {
141
141
  let set = &self.0;
142
142
  let haystack = unsafe { haystack.as_slice() };
143
143
 
144
- set.matches(haystack).into_iter().collect()
144
+ set.matches(haystack).iter().collect()
145
145
  }
146
146
 
147
147
  pub fn is_match(&self, haystack: RString) -> bool {
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class RustRegexp
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.2"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rust_regexp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dmytro Horoshko
@@ -9,7 +9,7 @@ bindir: exe
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies: []
12
- description: Simple bindings to rust/regex library.
12
+ description: Simple bindings for rust/regex library.
13
13
  email:
14
14
  - electric.molfar@gmail.com
15
15
  executables: []