xre2 0.1.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 55ca051b3f858164be0f5e79831c1ca4b7df267d4e3597f8c128db79524b4f0a
4
- data.tar.gz: 0a2d988389268e3cac55d5970075111f74c110479bf1bfc0213b7ee989424872
3
+ metadata.gz: 6401693cee9fee608b66f07638eb684c95a0ee5d16519aca6d272920cc8bbe3a
4
+ data.tar.gz: 9c6a061de9b45aeb7acbc5404dd0b21c3f0f394a3f54632e993a335b6a6da5de
5
5
  SHA512:
6
- metadata.gz: 0fc6f00f3d0283128e09b16bb61210f8de3e9f74f4605b730a36d2790f8a78a6adb2e461f089a585324663b03b98545bcffedfdf59f872f285a423f1cf9d08c0
7
- data.tar.gz: ed746fbdf6a0da543a08a5b8259e34541d6feb92f46605a509a931b2419d0d6cefe70fff0cdea2163d4c0e1e153fa2833d3fa6e772160ce0e9d7306093459648
6
+ metadata.gz: 8feee07ba3c54a1ffda829a7b4903dd9ef9dc909dd8ba87b1c5cfffa3ee06a75ae712ae71da69ade4cb14f193e7d8f9f78a8d0696cb3e72bef3e583b07c85677
7
+ data.tar.gz: 757d878581016a0f3299b2d4c677e4c4db129ff864469fba7f24b91b147066c461e1caa1c607e8981487768e0cf538cbe63618108c9d77fc21defca8109d22f1
data/Cargo.lock CHANGED
@@ -106,10 +106,6 @@ name = "cc"
106
106
  version = "1.0.90"
107
107
  source = "registry+https://github.com/rust-lang/crates.io-index"
108
108
  checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
109
- dependencies = [
110
- "jobserver",
111
- "libc",
112
- ]
113
109
 
114
110
  [[package]]
115
111
  name = "cexpr"
@@ -231,15 +227,6 @@ dependencies = [
231
227
  "either",
232
228
  ]
233
229
 
234
- [[package]]
235
- name = "jobserver"
236
- version = "0.1.28"
237
- source = "registry+https://github.com/rust-lang/crates.io-index"
238
- checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
239
- dependencies = [
240
- "libc",
241
- ]
242
-
243
230
  [[package]]
244
231
  name = "lazy_static"
245
232
  version = "1.4.0"
@@ -282,9 +269,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
282
269
 
283
270
  [[package]]
284
271
  name = "magnus"
285
- version = "0.6.2"
272
+ version = "0.6.3"
286
273
  source = "registry+https://github.com/rust-lang/crates.io-index"
287
- checksum = "4778544796676e8428e9c622460ebf284bea52d8b10db3aeb449d8b5e61b3a13"
274
+ checksum = "0fc7a31fb0b64761e3cd09a6975577601fccc5f08b8fc9245064fc4f71ed6a9d"
288
275
  dependencies = [
289
276
  "magnus-macros",
290
277
  "rb-sys",
@@ -354,28 +341,6 @@ dependencies = [
354
341
  "pkg-config",
355
342
  ]
356
343
 
357
- [[package]]
358
- name = "pcre2"
359
- version = "0.2.7"
360
- source = "registry+https://github.com/rust-lang/crates.io-index"
361
- checksum = "5ea92ff5eabd27703ab12cefe01b08b2809ec3dc75fdc69d4e6b75fbce0cbd67"
362
- dependencies = [
363
- "libc",
364
- "log",
365
- "pcre2-sys",
366
- ]
367
-
368
- [[package]]
369
- name = "pcre2-sys"
370
- version = "0.2.9"
371
- source = "registry+https://github.com/rust-lang/crates.io-index"
372
- checksum = "550f5d18fb1b90c20b87e161852c10cde77858c3900c5059b5ad2a1449f11d8a"
373
- dependencies = [
374
- "cc",
375
- "libc",
376
- "pkg-config",
377
- ]
378
-
379
344
  [[package]]
380
345
  name = "peeking_take_while"
381
346
  version = "0.1.2"
@@ -510,9 +475,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
510
475
 
511
476
  [[package]]
512
477
  name = "syn"
513
- version = "2.0.55"
478
+ version = "2.0.57"
514
479
  source = "registry+https://github.com/rust-lang/crates.io-index"
515
- checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
480
+ checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35"
516
481
  dependencies = [
517
482
  "proc-macro2",
518
483
  "quote",
@@ -543,6 +508,12 @@ version = "1.0.12"
543
508
  source = "registry+https://github.com/rust-lang/crates.io-index"
544
509
  checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
545
510
 
511
+ [[package]]
512
+ name = "unicode-segmentation"
513
+ version = "1.11.0"
514
+ source = "registry+https://github.com/rust-lang/crates.io-index"
515
+ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
516
+
546
517
  [[package]]
547
518
  name = "unicode-width"
548
519
  version = "0.1.11"
@@ -671,5 +642,6 @@ dependencies = [
671
642
  "fancy-regex",
672
643
  "magnus",
673
644
  "onig",
674
- "pcre2",
645
+ "regex",
646
+ "unicode-segmentation",
675
647
  ]
data/README.md CHANGED
@@ -1,24 +1,20 @@
1
1
  # Xre2
2
2
 
3
- TODO: Delete this and the text below, and describe your gem
4
-
5
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/xre2`. To experiment with that code, run `bin/console` for an interactive prompt.
3
+ some rust regexes for ruby
6
4
 
7
5
  ## Installation
8
6
 
9
- TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
10
-
11
7
  Install the gem and add to the application's Gemfile by executing:
12
8
 
13
- $ bundle add UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
9
+ $ bundle add xre2
14
10
 
15
11
  If bundler is not being used to manage dependencies, install the gem by executing:
16
12
 
17
- $ gem install UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
13
+ $ gem install xre2
18
14
 
19
15
  ## Usage
20
16
 
21
- TODO: Write usage instructions here
17
+ todo
22
18
 
23
19
  ## Development
24
20
 
@@ -28,7 +24,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
28
24
 
29
25
  ## Contributing
30
26
 
31
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/xre2. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/xre2/blob/main/CODE_OF_CONDUCT.md).
27
+ Bug reports and pull requests are welcome on GitHub at https://github.com/vagab/xre2. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/vagab/xre2/blob/main/CODE_OF_CONDUCT.md).
32
28
 
33
29
  ## License
34
30
 
@@ -36,4 +32,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
36
32
 
37
33
  ## Code of Conduct
38
34
 
39
- Everyone interacting in the Xre2 project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/xre2/blob/main/CODE_OF_CONDUCT.md).
35
+ Everyone interacting in the Xre2 project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/vagab/xre2/blob/main/CODE_OF_CONDUCT.md).
data/ext/xre2/Cargo.toml CHANGED
@@ -11,6 +11,7 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  fancy-regex = "0.13.0"
14
- magnus = { version = "0.6.2" }
14
+ magnus = "0.6.2"
15
15
  onig = "6.4.0"
16
- pcre2 = "0.2.7"
16
+ regex = "1.10.4"
17
+ unicode-segmentation = "1.11.0"
data/ext/xre2/src/lib.rs CHANGED
@@ -1,34 +1,196 @@
1
1
  use magnus::{function, prelude::*, Error, Ruby};
2
- use pcre2::bytes::*;
2
+ use std::iter::Enumerate;
3
+ use std::ops::Range;
4
+ use std::str::CharIndices;
5
+ use unicode_segmentation::UnicodeSegmentation;
6
+
7
+ #[magnus::wrap(class = "Regexx")]
8
+ struct Regex {
9
+ regex: regex::Regex,
10
+ }
11
+
12
+ #[magnus::wrap(class = "RegexxList")]
13
+ struct RegexList {
14
+ regexes: Vec<regex::Regex>,
15
+ }
16
+
17
+ impl RegexList {
18
+ fn new(regexes: Vec<String>) -> Self {
19
+ Self {
20
+ regexes: regexes.iter().map(|r| regex(r.to_string()).regex).collect(),
21
+ }
22
+ }
23
+ }
24
+
25
+ fn regex(target: String) -> Regex {
26
+ let regex = regex::RegexBuilder::new(&target)
27
+ .multi_line(true)
28
+ .build()
29
+ .unwrap();
30
+
31
+ Regex { regex }
32
+ }
33
+
34
+ fn reg_array(target: &RegexList, subject: String) -> Vec<(String, usize)> {
35
+ target
36
+ .regexes
37
+ .iter()
38
+ .map(|t| reg_compiled_array(t, &subject))
39
+ .flatten()
40
+ .collect()
41
+ }
42
+
43
+ fn reg_array_with_context(target: &RegexList, subject: String) -> Vec<(String, usize, String)> {
44
+ target
45
+ .regexes
46
+ .iter()
47
+ .map(|t| reg_compiled_array_with_context(t, &subject))
48
+ .flatten()
49
+ .collect()
50
+ }
51
+
52
+ fn reg_compiled_array(target: &regex::Regex, subject: &str) -> Vec<(String, usize)> {
53
+ let mut chars_iterator = subject.char_indices().enumerate();
54
+
55
+ target
56
+ .captures_iter(subject)
57
+ .map(|m| m.get(1).unwrap())
58
+ .map(|m| {
59
+ (
60
+ m.as_str().trim().to_string(),
61
+ find_char_index(&mut chars_iterator, m.start()),
62
+ )
63
+ })
64
+ .collect()
65
+ }
66
+
67
+ fn reg_compiled_array_with_context(
68
+ target: &regex::Regex,
69
+ subject: &str,
70
+ ) -> Vec<(String, usize, String)> {
71
+ let mut chars_iterator = subject.char_indices().enumerate();
72
+
73
+ target
74
+ .captures_iter(subject)
75
+ .map(|m| m.get(1).unwrap())
76
+ .map(|m| {
77
+ (
78
+ m.as_str().trim().to_string(),
79
+ find_char_index(&mut chars_iterator, m.start()),
80
+ extract_from_str(&subject, m.start()..m.end(), 10),
81
+ )
82
+ })
83
+ .collect()
84
+ }
85
+
86
+ fn reg_compiled(target: &Regex, subject: String) -> Vec<(String, usize)> {
87
+ let mut chars_iterator = subject.char_indices().enumerate();
88
+
89
+ target
90
+ .regex
91
+ .captures_iter(&subject)
92
+ .map(|m| m.get(1).unwrap())
93
+ .map(|m| {
94
+ (
95
+ m.as_str().trim().to_string(),
96
+ find_char_index(&mut chars_iterator, m.start()),
97
+ )
98
+ })
99
+ .collect()
100
+ }
101
+
102
+ fn reg_compiled_with_context(target: &Regex, subject: String) -> Vec<(String, usize, String)> {
103
+ let mut chars_iterator = subject.char_indices().enumerate();
104
+
105
+ target
106
+ .regex
107
+ .captures_iter(&subject)
108
+ .map(|m| m.get(1).unwrap())
109
+ .map(|m| {
110
+ (
111
+ m.as_str().trim().to_string(),
112
+ find_char_index(&mut chars_iterator, m.start()),
113
+ extract_from_str(&subject, m.start()..m.end(), 10),
114
+ )
115
+ })
116
+ .collect()
117
+ }
118
+
119
+ fn find_char_index(iterator: &mut Enumerate<CharIndices>, byte_start: usize) -> usize {
120
+ iterator
121
+ .find_map(|(idx, (byte_index, _))| (byte_index >= byte_start).then_some(idx))
122
+ .unwrap_or(0)
123
+ }
124
+
125
+ fn extract_from_vec(vec: &Vec<char>, range: Range<usize>, radius: usize) -> String {
126
+ vec[range.start.saturating_sub(radius)..range.end.saturating_add(radius).min(vec.len() - 1)]
127
+ .iter()
128
+ .collect()
129
+ }
130
+
131
+ fn extract_from_str(subject: &str, range: Range<usize>, radius: usize) -> String {
132
+ let l_text = subject[..range.start]
133
+ .graphemes(true)
134
+ .rev()
135
+ .take(radius)
136
+ .collect::<String>()
137
+ .chars()
138
+ .rev()
139
+ .collect::<String>();
140
+ let r_text = subject[range.end..]
141
+ .graphemes(true)
142
+ .skip(1)
143
+ .take(radius)
144
+ .collect::<String>();
145
+ format!("{}{}{}", l_text, &subject[range], r_text)
146
+ }
147
+
148
+ fn onig(target: String, subject: String) -> Vec<(String, usize)> {
149
+ let mut chars_iterator = subject.char_indices().enumerate();
3
150
 
4
- fn onig(target: String, subject: String) -> Vec<(usize, usize)> {
5
151
  onig::Regex::new(&target)
6
152
  .unwrap()
7
153
  .find_iter(&subject)
154
+ .map(|m| {
155
+ (
156
+ subject[m.0..m.1].to_string(),
157
+ find_char_index(&mut chars_iterator, m.0),
158
+ )
159
+ })
8
160
  .collect()
9
161
  }
10
162
 
11
- fn fancy(target: String, subject: String) -> Vec<(String, usize)> {
12
- fancy_regex::RegexBuilder::new(&target)
13
- .backtrack_limit(10_000_000)
163
+ fn reg(target: String, subject: String) -> Vec<(String, usize)> {
164
+ let mut chars_iterator = subject.char_indices().enumerate();
165
+
166
+ regex::RegexBuilder::new(&target)
167
+ // .multi_line(true)
14
168
  .build()
15
169
  .unwrap()
16
- .find_iter(&subject)
17
- .map(|m| m.unwrap())
18
- .map(|m| (m.as_str().to_string(), m.start() + 1))
170
+ .captures_iter(&subject)
171
+ .map(|m| m.get(1).unwrap())
172
+ .map(|m| {
173
+ (
174
+ m.as_str().trim().to_string(),
175
+ find_char_index(&mut chars_iterator, m.start()),
176
+ )
177
+ })
19
178
  .collect()
20
179
  }
21
180
 
22
- fn pcre2(target: String, subject: String) -> Vec<(String, usize)> {
23
- RegexBuilder::new()
24
- .build(&target)
181
+ fn fancy(target: String, subject: String) -> Vec<(String, usize)> {
182
+ let mut chars_iterator = subject.char_indices().enumerate();
183
+
184
+ fancy_regex::RegexBuilder::new(&target)
185
+ .backtrack_limit(100_000_000)
186
+ .build()
25
187
  .unwrap()
26
- .find_iter(subject.as_bytes())
27
- .map(|m| m.unwrap())
188
+ .captures_iter(&subject)
189
+ .map(|m| m.unwrap().iter().nth(1).unwrap().unwrap())
28
190
  .map(|m| {
29
191
  (
30
- std::str::from_utf8(m.as_bytes()).unwrap().to_string(),
31
- m.start() + 1,
192
+ m.as_str().trim().to_string(),
193
+ find_char_index(&mut chars_iterator, m.start()),
32
194
  )
33
195
  })
34
196
  .collect()
@@ -37,8 +199,22 @@ fn pcre2(target: String, subject: String) -> Vec<(String, usize)> {
37
199
  #[magnus::init]
38
200
  fn init(ruby: &Ruby) -> Result<(), Error> {
39
201
  let module = ruby.define_module("Xre2")?;
202
+ ruby.define_class("Regexx", ruby.class_object())?;
203
+ let regexxlist = ruby.define_class("RegexxList", ruby.class_object())?;
204
+ regexxlist.define_singleton_method("new", function!(RegexList::new, 1))?;
205
+ module.define_singleton_method("regex", function!(regex, 1))?;
40
206
  module.define_singleton_method("fancy", function!(fancy, 2))?;
41
207
  module.define_singleton_method("onig", function!(onig, 2))?;
42
- module.define_singleton_method("pcre2", function!(pcre2, 2))?;
208
+ module.define_singleton_method("reg", function!(reg, 2))?;
209
+ module.define_singleton_method("reg_array", function!(reg_array, 2))?;
210
+ module.define_singleton_method("reg_compiled", function!(reg_compiled, 2))?;
211
+ module.define_singleton_method(
212
+ "reg_compiled_with_context",
213
+ function!(reg_compiled_with_context, 2),
214
+ )?;
215
+ module.define_singleton_method(
216
+ "reg_array_with_context",
217
+ function!(reg_array_with_context, 2),
218
+ )?;
43
219
  Ok(())
44
220
  }
data/lib/xre2/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Xre2
4
- VERSION = "0.1.0"
4
+ VERSION = "0.4.0"
5
5
  end