xre2 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 919152e7fb958450194d682d69c6adc9f2ad576ff3f832e1fb913db820bd5322
4
- data.tar.gz: 58ddf421c42e2d3b243f98904bae4f12c3c762bbdb942436d95c82494cb09b62
3
+ metadata.gz: 6401693cee9fee608b66f07638eb684c95a0ee5d16519aca6d272920cc8bbe3a
4
+ data.tar.gz: 9c6a061de9b45aeb7acbc5404dd0b21c3f0f394a3f54632e993a335b6a6da5de
5
5
  SHA512:
6
- metadata.gz: 94b5efe9cd317b393e7d674da04a27a54931c51cbb27af8d8371f61a6db67935c362e0152a31e37e014575f95b09041c3a02dfea914906672a7a23dfdba4f9cf
7
- data.tar.gz: 53fa690dbf0872e34fa190edfb03331f847a8e2b70769f7c6a3eeb7ae9b12f93efb1675e86af020579f5be90687c4844d2b753499e01253402429a83f676370b
6
+ metadata.gz: 8feee07ba3c54a1ffda829a7b4903dd9ef9dc909dd8ba87b1c5cfffa3ee06a75ae712ae71da69ade4cb14f193e7d8f9f78a8d0696cb3e72bef3e583b07c85677
7
+ data.tar.gz: 757d878581016a0f3299b2d4c677e4c4db129ff864469fba7f24b91b147066c461e1caa1c607e8981487768e0cf538cbe63618108c9d77fc21defca8109d22f1
data/Cargo.lock CHANGED
@@ -106,10 +106,6 @@ name = "cc"
106
106
  version = "1.0.90"
107
107
  source = "registry+https://github.com/rust-lang/crates.io-index"
108
108
  checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
109
- dependencies = [
110
- "jobserver",
111
- "libc",
112
- ]
113
109
 
114
110
  [[package]]
115
111
  name = "cexpr"
@@ -231,15 +227,6 @@ dependencies = [
231
227
  "either",
232
228
  ]
233
229
 
234
- [[package]]
235
- name = "jobserver"
236
- version = "0.1.28"
237
- source = "registry+https://github.com/rust-lang/crates.io-index"
238
- checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
239
- dependencies = [
240
- "libc",
241
- ]
242
-
243
230
  [[package]]
244
231
  name = "lazy_static"
245
232
  version = "1.4.0"
@@ -282,9 +269,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
282
269
 
283
270
  [[package]]
284
271
  name = "magnus"
285
- version = "0.6.2"
272
+ version = "0.6.3"
286
273
  source = "registry+https://github.com/rust-lang/crates.io-index"
287
- checksum = "4778544796676e8428e9c622460ebf284bea52d8b10db3aeb449d8b5e61b3a13"
274
+ checksum = "0fc7a31fb0b64761e3cd09a6975577601fccc5f08b8fc9245064fc4f71ed6a9d"
288
275
  dependencies = [
289
276
  "magnus-macros",
290
277
  "rb-sys",
@@ -354,28 +341,6 @@ dependencies = [
354
341
  "pkg-config",
355
342
  ]
356
343
 
357
- [[package]]
358
- name = "pcre2"
359
- version = "0.2.7"
360
- source = "registry+https://github.com/rust-lang/crates.io-index"
361
- checksum = "5ea92ff5eabd27703ab12cefe01b08b2809ec3dc75fdc69d4e6b75fbce0cbd67"
362
- dependencies = [
363
- "libc",
364
- "log",
365
- "pcre2-sys",
366
- ]
367
-
368
- [[package]]
369
- name = "pcre2-sys"
370
- version = "0.2.9"
371
- source = "registry+https://github.com/rust-lang/crates.io-index"
372
- checksum = "550f5d18fb1b90c20b87e161852c10cde77858c3900c5059b5ad2a1449f11d8a"
373
- dependencies = [
374
- "cc",
375
- "libc",
376
- "pkg-config",
377
- ]
378
-
379
344
  [[package]]
380
345
  name = "peeking_take_while"
381
346
  version = "0.1.2"
@@ -510,9 +475,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
510
475
 
511
476
  [[package]]
512
477
  name = "syn"
513
- version = "2.0.55"
478
+ version = "2.0.57"
514
479
  source = "registry+https://github.com/rust-lang/crates.io-index"
515
- checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
480
+ checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35"
516
481
  dependencies = [
517
482
  "proc-macro2",
518
483
  "quote",
@@ -543,6 +508,12 @@ version = "1.0.12"
543
508
  source = "registry+https://github.com/rust-lang/crates.io-index"
544
509
  checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
545
510
 
511
+ [[package]]
512
+ name = "unicode-segmentation"
513
+ version = "1.11.0"
514
+ source = "registry+https://github.com/rust-lang/crates.io-index"
515
+ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
516
+
546
517
  [[package]]
547
518
  name = "unicode-width"
548
519
  version = "0.1.11"
@@ -671,5 +642,6 @@ dependencies = [
671
642
  "fancy-regex",
672
643
  "magnus",
673
644
  "onig",
674
- "pcre2",
645
+ "regex",
646
+ "unicode-segmentation",
675
647
  ]
data/ext/xre2/Cargo.toml CHANGED
@@ -11,6 +11,7 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  fancy-regex = "0.13.0"
14
- magnus = { version = "0.6.2" }
14
+ magnus = "0.6.2"
15
15
  onig = "6.4.0"
16
- pcre2 = "0.2.7"
16
+ regex = "1.10.4"
17
+ unicode-segmentation = "1.11.0"
data/ext/xre2/src/lib.rs CHANGED
@@ -1,7 +1,152 @@
1
1
  use magnus::{function, prelude::*, Error, Ruby};
2
+ use std::iter::Enumerate;
3
+ use std::ops::Range;
4
+ use std::str::CharIndices;
5
+ use unicode_segmentation::UnicodeSegmentation;
6
+
7
+ #[magnus::wrap(class = "Regexx")]
8
+ struct Regex {
9
+ regex: regex::Regex,
10
+ }
11
+
12
+ #[magnus::wrap(class = "RegexxList")]
13
+ struct RegexList {
14
+ regexes: Vec<regex::Regex>,
15
+ }
16
+
17
+ impl RegexList {
18
+ fn new(regexes: Vec<String>) -> Self {
19
+ Self {
20
+ regexes: regexes.iter().map(|r| regex(r.to_string()).regex).collect(),
21
+ }
22
+ }
23
+ }
24
+
25
+ fn regex(target: String) -> Regex {
26
+ let regex = regex::RegexBuilder::new(&target)
27
+ .multi_line(true)
28
+ .build()
29
+ .unwrap();
30
+
31
+ Regex { regex }
32
+ }
33
+
34
+ fn reg_array(target: &RegexList, subject: String) -> Vec<(String, usize)> {
35
+ target
36
+ .regexes
37
+ .iter()
38
+ .map(|t| reg_compiled_array(t, &subject))
39
+ .flatten()
40
+ .collect()
41
+ }
42
+
43
+ fn reg_array_with_context(target: &RegexList, subject: String) -> Vec<(String, usize, String)> {
44
+ target
45
+ .regexes
46
+ .iter()
47
+ .map(|t| reg_compiled_array_with_context(t, &subject))
48
+ .flatten()
49
+ .collect()
50
+ }
51
+
52
+ fn reg_compiled_array(target: &regex::Regex, subject: &str) -> Vec<(String, usize)> {
53
+ let mut chars_iterator = subject.char_indices().enumerate();
54
+
55
+ target
56
+ .captures_iter(subject)
57
+ .map(|m| m.get(1).unwrap())
58
+ .map(|m| {
59
+ (
60
+ m.as_str().trim().to_string(),
61
+ find_char_index(&mut chars_iterator, m.start()),
62
+ )
63
+ })
64
+ .collect()
65
+ }
66
+
67
+ fn reg_compiled_array_with_context(
68
+ target: &regex::Regex,
69
+ subject: &str,
70
+ ) -> Vec<(String, usize, String)> {
71
+ let mut chars_iterator = subject.char_indices().enumerate();
72
+
73
+ target
74
+ .captures_iter(subject)
75
+ .map(|m| m.get(1).unwrap())
76
+ .map(|m| {
77
+ (
78
+ m.as_str().trim().to_string(),
79
+ find_char_index(&mut chars_iterator, m.start()),
80
+ extract_from_str(&subject, m.start()..m.end(), 10),
81
+ )
82
+ })
83
+ .collect()
84
+ }
85
+
86
+ fn reg_compiled(target: &Regex, subject: String) -> Vec<(String, usize)> {
87
+ let mut chars_iterator = subject.char_indices().enumerate();
88
+
89
+ target
90
+ .regex
91
+ .captures_iter(&subject)
92
+ .map(|m| m.get(1).unwrap())
93
+ .map(|m| {
94
+ (
95
+ m.as_str().trim().to_string(),
96
+ find_char_index(&mut chars_iterator, m.start()),
97
+ )
98
+ })
99
+ .collect()
100
+ }
101
+
102
+ fn reg_compiled_with_context(target: &Regex, subject: String) -> Vec<(String, usize, String)> {
103
+ let mut chars_iterator = subject.char_indices().enumerate();
104
+
105
+ target
106
+ .regex
107
+ .captures_iter(&subject)
108
+ .map(|m| m.get(1).unwrap())
109
+ .map(|m| {
110
+ (
111
+ m.as_str().trim().to_string(),
112
+ find_char_index(&mut chars_iterator, m.start()),
113
+ extract_from_str(&subject, m.start()..m.end(), 10),
114
+ )
115
+ })
116
+ .collect()
117
+ }
118
+
119
+ fn find_char_index(iterator: &mut Enumerate<CharIndices>, byte_start: usize) -> usize {
120
+ iterator
121
+ .find_map(|(idx, (byte_index, _))| (byte_index >= byte_start).then_some(idx))
122
+ .unwrap_or(0)
123
+ }
124
+
125
+ fn extract_from_vec(vec: &Vec<char>, range: Range<usize>, radius: usize) -> String {
126
+ vec[range.start.saturating_sub(radius)..range.end.saturating_add(radius).min(vec.len() - 1)]
127
+ .iter()
128
+ .collect()
129
+ }
130
+
131
+ fn extract_from_str(subject: &str, range: Range<usize>, radius: usize) -> String {
132
+ let l_text = subject[..range.start]
133
+ .graphemes(true)
134
+ .rev()
135
+ .take(radius)
136
+ .collect::<String>()
137
+ .chars()
138
+ .rev()
139
+ .collect::<String>();
140
+ let r_text = subject[range.end..]
141
+ .graphemes(true)
142
+ .skip(1)
143
+ .take(radius)
144
+ .collect::<String>();
145
+ format!("{}{}{}", l_text, &subject[range], r_text)
146
+ }
2
147
 
3
148
  fn onig(target: String, subject: String) -> Vec<(String, usize)> {
4
- let mut chars_start_iterator = subject.char_indices().enumerate();
149
+ let mut chars_iterator = subject.char_indices().enumerate();
5
150
 
6
151
  onig::Regex::new(&target)
7
152
  .unwrap()
@@ -9,21 +154,21 @@ fn onig(target: String, subject: String) -> Vec<(String, usize)> {
9
154
  .map(|m| {
10
155
  (
11
156
  subject[m.0..m.1].to_string(),
12
- find_char_index(&mut chars_start_iterator, m.0),
157
+ find_char_index(&mut chars_iterator, m.0),
13
158
  )
14
159
  })
15
160
  .collect()
16
161
  }
17
162
 
18
- fn fancy(target: String, subject: String) -> Vec<(String, usize)> {
163
+ fn reg(target: String, subject: String) -> Vec<(String, usize)> {
19
164
  let mut chars_iterator = subject.char_indices().enumerate();
20
165
 
21
- fancy_regex::RegexBuilder::new(&target)
22
- .backtrack_limit(100_000_000)
166
+ regex::RegexBuilder::new(&target)
167
+ // .multi_line(true)
23
168
  .build()
24
169
  .unwrap()
25
- .find_iter(&subject)
26
- .map(|m| m.unwrap())
170
+ .captures_iter(&subject)
171
+ .map(|m| m.get(1).unwrap())
27
172
  .map(|m| {
28
173
  (
29
174
  m.as_str().trim().to_string(),
@@ -33,7 +178,7 @@ fn fancy(target: String, subject: String) -> Vec<(String, usize)> {
33
178
  .collect()
34
179
  }
35
180
 
36
- fn fancy_captures(target: String, subject: String) -> Vec<(String, usize)> {
181
+ fn fancy(target: String, subject: String) -> Vec<(String, usize)> {
37
182
  let mut chars_iterator = subject.char_indices().enumerate();
38
183
 
39
184
  fancy_regex::RegexBuilder::new(&target)
@@ -51,20 +196,25 @@ fn fancy_captures(target: String, subject: String) -> Vec<(String, usize)> {
51
196
  .collect()
52
197
  }
53
198
 
54
- fn find_char_index(
55
- iterator: &mut std::iter::Enumerate<std::str::CharIndices>,
56
- byte_start: usize,
57
- ) -> usize {
58
- iterator
59
- .find_map(|(idx, (byte_index, _))| (byte_index >= byte_start).then_some(idx))
60
- .unwrap_or(0)
61
- }
62
-
63
199
  #[magnus::init]
64
200
  fn init(ruby: &Ruby) -> Result<(), Error> {
65
201
  let module = ruby.define_module("Xre2")?;
202
+ ruby.define_class("Regexx", ruby.class_object())?;
203
+ let regexxlist = ruby.define_class("RegexxList", ruby.class_object())?;
204
+ regexxlist.define_singleton_method("new", function!(RegexList::new, 1))?;
205
+ module.define_singleton_method("regex", function!(regex, 1))?;
66
206
  module.define_singleton_method("fancy", function!(fancy, 2))?;
67
- module.define_singleton_method("fancy_captures", function!(fancy_captures, 2))?;
68
207
  module.define_singleton_method("onig", function!(onig, 2))?;
208
+ module.define_singleton_method("reg", function!(reg, 2))?;
209
+ module.define_singleton_method("reg_array", function!(reg_array, 2))?;
210
+ module.define_singleton_method("reg_compiled", function!(reg_compiled, 2))?;
211
+ module.define_singleton_method(
212
+ "reg_compiled_with_context",
213
+ function!(reg_compiled_with_context, 2),
214
+ )?;
215
+ module.define_singleton_method(
216
+ "reg_array_with_context",
217
+ function!(reg_array_with_context, 2),
218
+ )?;
69
219
  Ok(())
70
220
  }
data/lib/xre2/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Xre2
4
- VERSION = "0.2.0"
4
+ VERSION = "0.4.0"
5
5
  end
@@ -106,10 +106,6 @@ name = "cc"
106
106
  version = "1.0.90"
107
107
  source = "registry+https://github.com/rust-lang/crates.io-index"
108
108
  checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
109
- dependencies = [
110
- "jobserver",
111
- "libc",
112
- ]
113
109
 
114
110
  [[package]]
115
111
  name = "cexpr"
@@ -231,15 +227,6 @@ dependencies = [
231
227
  "either",
232
228
  ]
233
229
 
234
- [[package]]
235
- name = "jobserver"
236
- version = "0.1.28"
237
- source = "registry+https://github.com/rust-lang/crates.io-index"
238
- checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
239
- dependencies = [
240
- "libc",
241
- ]
242
-
243
230
  [[package]]
244
231
  name = "lazy_static"
245
232
  version = "1.4.0"
@@ -282,9 +269,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
282
269
 
283
270
  [[package]]
284
271
  name = "magnus"
285
- version = "0.6.2"
272
+ version = "0.6.3"
286
273
  source = "registry+https://github.com/rust-lang/crates.io-index"
287
- checksum = "4778544796676e8428e9c622460ebf284bea52d8b10db3aeb449d8b5e61b3a13"
274
+ checksum = "0fc7a31fb0b64761e3cd09a6975577601fccc5f08b8fc9245064fc4f71ed6a9d"
288
275
  dependencies = [
289
276
  "magnus-macros",
290
277
  "rb-sys",
@@ -354,28 +341,6 @@ dependencies = [
354
341
  "pkg-config",
355
342
  ]
356
343
 
357
- [[package]]
358
- name = "pcre2"
359
- version = "0.2.7"
360
- source = "registry+https://github.com/rust-lang/crates.io-index"
361
- checksum = "5ea92ff5eabd27703ab12cefe01b08b2809ec3dc75fdc69d4e6b75fbce0cbd67"
362
- dependencies = [
363
- "libc",
364
- "log",
365
- "pcre2-sys",
366
- ]
367
-
368
- [[package]]
369
- name = "pcre2-sys"
370
- version = "0.2.9"
371
- source = "registry+https://github.com/rust-lang/crates.io-index"
372
- checksum = "550f5d18fb1b90c20b87e161852c10cde77858c3900c5059b5ad2a1449f11d8a"
373
- dependencies = [
374
- "cc",
375
- "libc",
376
- "pkg-config",
377
- ]
378
-
379
344
  [[package]]
380
345
  name = "peeking_take_while"
381
346
  version = "0.1.2"
@@ -510,9 +475,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
510
475
 
511
476
  [[package]]
512
477
  name = "syn"
513
- version = "2.0.55"
478
+ version = "2.0.57"
514
479
  source = "registry+https://github.com/rust-lang/crates.io-index"
515
- checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
480
+ checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35"
516
481
  dependencies = [
517
482
  "proc-macro2",
518
483
  "quote",
@@ -543,6 +508,12 @@ version = "1.0.12"
543
508
  source = "registry+https://github.com/rust-lang/crates.io-index"
544
509
  checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
545
510
 
511
+ [[package]]
512
+ name = "unicode-segmentation"
513
+ version = "1.11.0"
514
+ source = "registry+https://github.com/rust-lang/crates.io-index"
515
+ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
516
+
546
517
  [[package]]
547
518
  name = "unicode-width"
548
519
  version = "0.1.11"
@@ -671,5 +642,6 @@ dependencies = [
671
642
  "fancy-regex",
672
643
  "magnus",
673
644
  "onig",
674
- "pcre2",
645
+ "regex",
646
+ "unicode-segmentation",
675
647
  ]
@@ -11,6 +11,7 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  fancy-regex = "0.13.0"
14
- magnus = { version = "0.6.2" }
14
+ magnus = "0.6.2"
15
15
  onig = "6.4.0"
16
- pcre2 = "0.2.7"
16
+ regex = "1.10.4"
17
+ unicode-segmentation = "1.11.0"