RubyGems - rust_regexp - Versions diffs - 0.1.0 → 0.1.2 - Mend

rust_regexp 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: e186368d8d88a70355a0101812a8d8cd1fce54d02bbb910a316db7cf36800cd7
-  data.tar.gz: 7e08a707cd22e155ecde210a3e99f72eafe45efe5d998b3bdcafd0aab407e7f3
+  metadata.gz: 3fa462b537e9c799b5939981d347a240e31d6cf982cee50d3c63dff6f2fbc98b
+  data.tar.gz: bf19b0f1a052b8961645f216e4b2ce9ee4168f2447311752e1c7550f57f64286
 SHA512:
-  metadata.gz: 71b5dfce5fdc308ff46018f00608804429fa4a58d2d41f439a0add00f5b225669fd957149917be33beb43e1b3f4831ad618d51a9b62491e8cb5562130e762133
-  data.tar.gz: 71186a8b4fb00a89e53f4a1b736b54fdb3b0a3e3e682f04e96a10effade4704c0c602ebbbd45e150a066da574fc41f8fef7bb78f440e321d7161d486c191aa56
+  metadata.gz: 4cf864254f850c217dfb4c74a861c59748de044306a43a702c9c4fc677fa5a2afef9e9231a24e47f6bbce6ee6ae382f370463fafa8782e290e15d24728d1c932
+  data.tar.gz: 3e8f831f749e1ff403d4b72b5e49ad07ae7603a878086cd192cbe396760cb3682a02715f43cdcce49a4ef2a545255225ab8db33af91c9de17e89f23e53327c82

data/README.md CHANGED Viewed

@@ -1,6 +1,9 @@
 # RustRegexp
-Simple bindings for [rust/regex](https://docs.rs/regex/latest/regex/) library.
+[![Gem Version](https://badge.fury.io/rb/rust_regexp.svg)](https://badge.fury.io/rb/rust_regexp)
+[![Test](https://github.com/ocvit/rust_regexp/workflows/CI/badge.svg)](https://github.com/ocvit/rust_regexp/actions)
+Ruby bindings for [rust/regex](https://docs.rs/regex/latest/regex/) library.
 ## Installation
@@ -24,45 +27,51 @@ require "rust_regexp"
 ## Usage
-Regular expressions should pre-compiled before use:
+Regular expressions should be pre-compiled before use:
 ```ruby
-re = RustRegexp.new('(\w+):(\d+)')
+re = RustRegexp.new('p.t{2}ern*')
 # => #<RustRegexp:...>
 ```
 > [!TIP]
 > Note the use of *single quotes* when passing the regular expression as
-> a string to `rust_regexp` so that the backslashes aren't interpreted as escapes.
+> a string to `rust/regex` so that the backslashes aren't interpreted as escapes.
 To find a single match in the haystack:
 ```ruby
-re.match("ruby:123, rust:456")
+RustRegexp.new('\w+:\d+').match("ruby:123, rust:456")
+# => ["ruby:123"]
+RustRegexp.new('(\w+):(\d+)').match("ruby:123, rust:456")
 # => ["ruby", "123"]
 ```
 To find all matches in the haystack:
 ```ruby
-re.scan("ruby:123, rust:456")
+RustRegexp.new('\w+:\d+').scan("ruby:123, rust:456")
+# => ["ruby:123", "rust:456"]
+RustRegexp.new('(\w+):(\d+)').scan("ruby:123, rust:456")
 # => [["ruby", "123"], ["rust", "456"]]
 ```
 To check whether there is at least one match in the haystack:
 ```ruby
-re.match?("ruby:123")
+RustRegexp.new('\w+:\d+').match?("ruby:123")
 # => true
-re.match?("ruby")
+RustRegexp.new('\w+:\d+').match?("ruby")
 # => false
 ```
 Inspect original pattern:
 ```ruby
-re.pattern
+RustRegexp.new('\w+:\d+').pattern
 # => "(\\w+):(\\d+)"
 ```
@@ -91,18 +100,40 @@ set.match("ghidefabc") # => [0, 1, 2]
 To check whether at least one pattern from the set matches the haystack:
 ```ruby
-set.match?("abc")
+RustRegexp::Set.new(["abc", "def"]).match?("abc")
 # => true
-set.match?("123")
+RustRegexp::Set.new(["abc", "def"]).match?("123")
 # => false
 ```
 Inspect original patterns:
 ```ruby
-set.patterns
-# => ["abc", "def", "ghi", "xyz"]
+RustRegexp::Set.new(["abc", "def"]).patterns
+# => ["abc", "def"]
+```
+## Encoding
+Currently, `rust_regexp` expects the haystack to be an UTF-8 string.
+It also supports parsing of strings with invalid UTF-8 characters by default. It's achieved via using `regex::bytes` instead of plain `regex` under the hood, so any byte sequence can be matched. The output match is encoded as UTF-8 string.
+In case unicode awarness of matchers should be disabled, both `RustRegexp` and `RustRegexp::Set` support `unicode: false` option:
+```ruby
+RustRegexp.new('\w+').match('ю٤夏')
+# => ["ю٤夏"]
+RustRegexp.new('\w+', unicode: false).match('ю٤夏')
+# => []
+RustRegexp::Set.new(['\w', '\d', '\s']).match("ю٤\u2000")
+# => [0, 1, 2]
+RustRegexp::Set.new(['\w', '\d', '\s'], unicode: false).match("ю٤\u2000")
+# => []
 ```
 ## Development

data/ext/rust_regexp/src/lib.rs CHANGED Viewed

@@ -1,28 +1,29 @@
 use magnus::{
-    class,
-    define_class,
+    class, define_class,
     encoding::RbEncoding,
-    exception,
-    function,
-    method,
-    prelude::*,
-    scan_args::scan_args,
-    Value,
-    Error,
-    RString,
-    RArray,
+    exception, function, method,
+    scan_args::{get_kwargs, scan_args},
+    Error, Module, Object, RArray, RHash, RString, Value,
 };
-use regex::bytes::{Regex, RegexSet, Match};
+use regex::bytes::{Match, Regex, RegexBuilder, RegexSet, RegexSetBuilder};
 #[magnus::wrap(class = "RustRegexp", free_immediately, size)]
 pub struct RustRegexp(Regex);
 impl RustRegexp {
     pub fn new(args: &[Value]) -> Result<Self, Error> {
-        let args = scan_args::<(String,), (), (), (), (), ()>(args)?;
+        let args = scan_args::<(String,), (), (), (), RHash, ()>(args)?;
+        let kwargs = get_kwargs::<_, (), (Option<bool>,), ()>(args.keywords, &[], &["unicode"])?;
         let pattern = args.required.0;
+        let (unicode,) = kwargs.optional;
+        let unicode = unicode.unwrap_or_else(|| true);
-        let regex = Regex::new(&pattern).map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
+        let mut builder = RegexBuilder::new(&pattern);
+        let regex = builder
+            .unicode(unicode)
+            .build()
+            .map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
         Ok(Self(regex))
     }
@@ -33,6 +34,7 @@ impl RustRegexp {
         let regex = &self.0;
         let haystack = unsafe { haystack.as_slice() };
+        // no capture groups defined except the default one
         if regex.captures_len() == 1 {
             // speed optimization, `.find` is faster than `.captures`
             if let Some(capture) = regex.find(haystack) {
@@ -65,17 +67,12 @@ impl RustRegexp {
         let regex = &self.0;
         let haystack = unsafe { haystack.as_slice() };
+        // no capture groups defined except the default one
         if regex.captures_len() == 1 {
             // speed optimization, `.find_iter` is faster than `.captures_iter`
             for capture in regex.find_iter(haystack) {
-                let group = RArray::with_capacity(1);
-                group
-                    .push(Self::capture_to_ruby_string(&capture))
-                    .expect("Non-frozen array");
                 result
-                    .push(group)
+                    .push(Self::capture_to_ruby_string(&capture))
                     .expect("Non-frozen array");
             }
         } else {
@@ -94,9 +91,7 @@ impl RustRegexp {
                     }
                 }
-                result
-                    .push(group)
-                    .expect("Non-frozen array");
+                result.push(group).expect("Non-frozen array");
             }
         }
@@ -117,10 +112,7 @@ impl RustRegexp {
     }
     fn capture_to_ruby_string(capture: &Match) -> RString {
-        RString::enc_new(
-            capture.as_bytes(),
-            RbEncoding::utf8()
-        )
+        RString::enc_new(capture.as_bytes(), RbEncoding::utf8())
     }
 }
@@ -129,10 +121,18 @@ pub struct RustRegexpSet(RegexSet);
 impl RustRegexpSet {
     pub fn new(args: &[Value]) -> Result<Self, Error> {
-        let args = scan_args::<(Vec<String>,), (), (), (), (), ()>(args)?;
+        let args = scan_args::<(Vec<String>,), (), (), (), RHash, ()>(args)?;
+        let kwargs = get_kwargs::<_, (), (Option<bool>,), ()>(args.keywords, &[], &["unicode"])?;
         let patterns = args.required.0;
+        let (unicode,) = kwargs.optional;
+        let unicode = unicode.unwrap_or_else(|| true);
-        let set = RegexSet::new(patterns).map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
+        let mut builder = RegexSetBuilder::new(patterns);
+        let set = builder
+            .unicode(unicode)
+            .build()
+            .map_err(|e| Error::new(exception::arg_error(), e.to_string()))?;
         Ok(Self(set))
     }
@@ -141,7 +141,7 @@ impl RustRegexpSet {
         let set = &self.0;
         let haystack = unsafe { haystack.as_slice() };
-        set.matches(haystack).into_iter().collect()
+        set.matches(haystack).iter().collect()
     }
     pub fn is_match(&self, haystack: RString) -> bool {

data/lib/rust_regexp/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 class RustRegexp
-  VERSION = "0.1.0"
+  VERSION = "0.1.2"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rust_regexp
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.1.2
 platform: ruby
 authors:
 - Dmytro Horoshko
@@ -9,7 +9,7 @@ bindir: exe
 cert_chain: []
 date: 1980-01-02 00:00:00.000000000 Z
 dependencies: []
-description: Simple bindings to rust/regex library.
+description: Simple bindings for rust/regex library.
 email:
 - electric.molfar@gmail.com
 executables: []