xre2 0.2.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 919152e7fb958450194d682d69c6adc9f2ad576ff3f832e1fb913db820bd5322
4
- data.tar.gz: 58ddf421c42e2d3b243f98904bae4f12c3c762bbdb942436d95c82494cb09b62
3
+ metadata.gz: 6401693cee9fee608b66f07638eb684c95a0ee5d16519aca6d272920cc8bbe3a
4
+ data.tar.gz: 9c6a061de9b45aeb7acbc5404dd0b21c3f0f394a3f54632e993a335b6a6da5de
5
5
  SHA512:
6
- metadata.gz: 94b5efe9cd317b393e7d674da04a27a54931c51cbb27af8d8371f61a6db67935c362e0152a31e37e014575f95b09041c3a02dfea914906672a7a23dfdba4f9cf
7
- data.tar.gz: 53fa690dbf0872e34fa190edfb03331f847a8e2b70769f7c6a3eeb7ae9b12f93efb1675e86af020579f5be90687c4844d2b753499e01253402429a83f676370b
6
+ metadata.gz: 8feee07ba3c54a1ffda829a7b4903dd9ef9dc909dd8ba87b1c5cfffa3ee06a75ae712ae71da69ade4cb14f193e7d8f9f78a8d0696cb3e72bef3e583b07c85677
7
+ data.tar.gz: 757d878581016a0f3299b2d4c677e4c4db129ff864469fba7f24b91b147066c461e1caa1c607e8981487768e0cf538cbe63618108c9d77fc21defca8109d22f1
data/Cargo.lock CHANGED
@@ -106,10 +106,6 @@ name = "cc"
106
106
  version = "1.0.90"
107
107
  source = "registry+https://github.com/rust-lang/crates.io-index"
108
108
  checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
109
- dependencies = [
110
- "jobserver",
111
- "libc",
112
- ]
113
109
 
114
110
  [[package]]
115
111
  name = "cexpr"
@@ -231,15 +227,6 @@ dependencies = [
231
227
  "either",
232
228
  ]
233
229
 
234
- [[package]]
235
- name = "jobserver"
236
- version = "0.1.28"
237
- source = "registry+https://github.com/rust-lang/crates.io-index"
238
- checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
239
- dependencies = [
240
- "libc",
241
- ]
242
-
243
230
  [[package]]
244
231
  name = "lazy_static"
245
232
  version = "1.4.0"
@@ -282,9 +269,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
282
269
 
283
270
  [[package]]
284
271
  name = "magnus"
285
- version = "0.6.2"
272
+ version = "0.6.3"
286
273
  source = "registry+https://github.com/rust-lang/crates.io-index"
287
- checksum = "4778544796676e8428e9c622460ebf284bea52d8b10db3aeb449d8b5e61b3a13"
274
+ checksum = "0fc7a31fb0b64761e3cd09a6975577601fccc5f08b8fc9245064fc4f71ed6a9d"
288
275
  dependencies = [
289
276
  "magnus-macros",
290
277
  "rb-sys",
@@ -354,28 +341,6 @@ dependencies = [
354
341
  "pkg-config",
355
342
  ]
356
343
 
357
- [[package]]
358
- name = "pcre2"
359
- version = "0.2.7"
360
- source = "registry+https://github.com/rust-lang/crates.io-index"
361
- checksum = "5ea92ff5eabd27703ab12cefe01b08b2809ec3dc75fdc69d4e6b75fbce0cbd67"
362
- dependencies = [
363
- "libc",
364
- "log",
365
- "pcre2-sys",
366
- ]
367
-
368
- [[package]]
369
- name = "pcre2-sys"
370
- version = "0.2.9"
371
- source = "registry+https://github.com/rust-lang/crates.io-index"
372
- checksum = "550f5d18fb1b90c20b87e161852c10cde77858c3900c5059b5ad2a1449f11d8a"
373
- dependencies = [
374
- "cc",
375
- "libc",
376
- "pkg-config",
377
- ]
378
-
379
344
  [[package]]
380
345
  name = "peeking_take_while"
381
346
  version = "0.1.2"
@@ -510,9 +475,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
510
475
 
511
476
  [[package]]
512
477
  name = "syn"
513
- version = "2.0.55"
478
+ version = "2.0.57"
514
479
  source = "registry+https://github.com/rust-lang/crates.io-index"
515
- checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
480
+ checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35"
516
481
  dependencies = [
517
482
  "proc-macro2",
518
483
  "quote",
@@ -543,6 +508,12 @@ version = "1.0.12"
543
508
  source = "registry+https://github.com/rust-lang/crates.io-index"
544
509
  checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
545
510
 
511
+ [[package]]
512
+ name = "unicode-segmentation"
513
+ version = "1.11.0"
514
+ source = "registry+https://github.com/rust-lang/crates.io-index"
515
+ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
516
+
546
517
  [[package]]
547
518
  name = "unicode-width"
548
519
  version = "0.1.11"
@@ -671,5 +642,6 @@ dependencies = [
671
642
  "fancy-regex",
672
643
  "magnus",
673
644
  "onig",
674
- "pcre2",
645
+ "regex",
646
+ "unicode-segmentation",
675
647
  ]
data/ext/xre2/Cargo.toml CHANGED
@@ -11,6 +11,7 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  fancy-regex = "0.13.0"
14
- magnus = { version = "0.6.2" }
14
+ magnus = "0.6.2"
15
15
  onig = "6.4.0"
16
- pcre2 = "0.2.7"
16
+ regex = "1.10.4"
17
+ unicode-segmentation = "1.11.0"
data/ext/xre2/src/lib.rs CHANGED
@@ -1,7 +1,152 @@
1
1
  use magnus::{function, prelude::*, Error, Ruby};
2
+ use std::iter::Enumerate;
3
+ use std::ops::Range;
4
+ use std::str::CharIndices;
5
+ use unicode_segmentation::UnicodeSegmentation;
6
+
7
+ #[magnus::wrap(class = "Regexx")]
8
+ struct Regex {
9
+ regex: regex::Regex,
10
+ }
11
+
12
+ #[magnus::wrap(class = "RegexxList")]
13
+ struct RegexList {
14
+ regexes: Vec<regex::Regex>,
15
+ }
16
+
17
+ impl RegexList {
18
+ fn new(regexes: Vec<String>) -> Self {
19
+ Self {
20
+ regexes: regexes.iter().map(|r| regex(r.to_string()).regex).collect(),
21
+ }
22
+ }
23
+ }
24
+
25
+ fn regex(target: String) -> Regex {
26
+ let regex = regex::RegexBuilder::new(&target)
27
+ .multi_line(true)
28
+ .build()
29
+ .unwrap();
30
+
31
+ Regex { regex }
32
+ }
33
+
34
+ fn reg_array(target: &RegexList, subject: String) -> Vec<(String, usize)> {
35
+ target
36
+ .regexes
37
+ .iter()
38
+ .map(|t| reg_compiled_array(t, &subject))
39
+ .flatten()
40
+ .collect()
41
+ }
42
+
43
+ fn reg_array_with_context(target: &RegexList, subject: String) -> Vec<(String, usize, String)> {
44
+ target
45
+ .regexes
46
+ .iter()
47
+ .map(|t| reg_compiled_array_with_context(t, &subject))
48
+ .flatten()
49
+ .collect()
50
+ }
51
+
52
+ fn reg_compiled_array(target: &regex::Regex, subject: &str) -> Vec<(String, usize)> {
53
+ let mut chars_iterator = subject.char_indices().enumerate();
54
+
55
+ target
56
+ .captures_iter(subject)
57
+ .map(|m| m.get(1).unwrap())
58
+ .map(|m| {
59
+ (
60
+ m.as_str().trim().to_string(),
61
+ find_char_index(&mut chars_iterator, m.start()),
62
+ )
63
+ })
64
+ .collect()
65
+ }
66
+
67
+ fn reg_compiled_array_with_context(
68
+ target: &regex::Regex,
69
+ subject: &str,
70
+ ) -> Vec<(String, usize, String)> {
71
+ let mut chars_iterator = subject.char_indices().enumerate();
72
+
73
+ target
74
+ .captures_iter(subject)
75
+ .map(|m| m.get(1).unwrap())
76
+ .map(|m| {
77
+ (
78
+ m.as_str().trim().to_string(),
79
+ find_char_index(&mut chars_iterator, m.start()),
80
+ extract_from_str(&subject, m.start()..m.end(), 10),
81
+ )
82
+ })
83
+ .collect()
84
+ }
85
+
86
+ fn reg_compiled(target: &Regex, subject: String) -> Vec<(String, usize)> {
87
+ let mut chars_iterator = subject.char_indices().enumerate();
88
+
89
+ target
90
+ .regex
91
+ .captures_iter(&subject)
92
+ .map(|m| m.get(1).unwrap())
93
+ .map(|m| {
94
+ (
95
+ m.as_str().trim().to_string(),
96
+ find_char_index(&mut chars_iterator, m.start()),
97
+ )
98
+ })
99
+ .collect()
100
+ }
101
+
102
+ fn reg_compiled_with_context(target: &Regex, subject: String) -> Vec<(String, usize, String)> {
103
+ let mut chars_iterator = subject.char_indices().enumerate();
104
+
105
+ target
106
+ .regex
107
+ .captures_iter(&subject)
108
+ .map(|m| m.get(1).unwrap())
109
+ .map(|m| {
110
+ (
111
+ m.as_str().trim().to_string(),
112
+ find_char_index(&mut chars_iterator, m.start()),
113
+ extract_from_str(&subject, m.start()..m.end(), 10),
114
+ )
115
+ })
116
+ .collect()
117
+ }
118
+
119
+ fn find_char_index(iterator: &mut Enumerate<CharIndices>, byte_start: usize) -> usize {
120
+ iterator
121
+ .find_map(|(idx, (byte_index, _))| (byte_index >= byte_start).then_some(idx))
122
+ .unwrap_or(0)
123
+ }
124
+
125
+ fn extract_from_vec(vec: &Vec<char>, range: Range<usize>, radius: usize) -> String {
126
+ vec[range.start.saturating_sub(radius)..range.end.saturating_add(radius).min(vec.len() - 1)]
127
+ .iter()
128
+ .collect()
129
+ }
130
+
131
+ fn extract_from_str(subject: &str, range: Range<usize>, radius: usize) -> String {
132
+ let l_text = subject[..range.start]
133
+ .graphemes(true)
134
+ .rev()
135
+ .take(radius)
136
+ .collect::<String>()
137
+ .chars()
138
+ .rev()
139
+ .collect::<String>();
140
+ let r_text = subject[range.end..]
141
+ .graphemes(true)
142
+ .skip(1)
143
+ .take(radius)
144
+ .collect::<String>();
145
+ format!("{}{}{}", l_text, &subject[range], r_text)
146
+ }
2
147
 
3
148
  fn onig(target: String, subject: String) -> Vec<(String, usize)> {
4
- let mut chars_start_iterator = subject.char_indices().enumerate();
149
+ let mut chars_iterator = subject.char_indices().enumerate();
5
150
 
6
151
  onig::Regex::new(&target)
7
152
  .unwrap()
@@ -9,21 +154,21 @@ fn onig(target: String, subject: String) -> Vec<(String, usize)> {
9
154
  .map(|m| {
10
155
  (
11
156
  subject[m.0..m.1].to_string(),
12
- find_char_index(&mut chars_start_iterator, m.0),
157
+ find_char_index(&mut chars_iterator, m.0),
13
158
  )
14
159
  })
15
160
  .collect()
16
161
  }
17
162
 
18
- fn fancy(target: String, subject: String) -> Vec<(String, usize)> {
163
+ fn reg(target: String, subject: String) -> Vec<(String, usize)> {
19
164
  let mut chars_iterator = subject.char_indices().enumerate();
20
165
 
21
- fancy_regex::RegexBuilder::new(&target)
22
- .backtrack_limit(100_000_000)
166
+ regex::RegexBuilder::new(&target)
167
+ // .multi_line(true)
23
168
  .build()
24
169
  .unwrap()
25
- .find_iter(&subject)
26
- .map(|m| m.unwrap())
170
+ .captures_iter(&subject)
171
+ .map(|m| m.get(1).unwrap())
27
172
  .map(|m| {
28
173
  (
29
174
  m.as_str().trim().to_string(),
@@ -33,7 +178,7 @@ fn fancy(target: String, subject: String) -> Vec<(String, usize)> {
33
178
  .collect()
34
179
  }
35
180
 
36
- fn fancy_captures(target: String, subject: String) -> Vec<(String, usize)> {
181
+ fn fancy(target: String, subject: String) -> Vec<(String, usize)> {
37
182
  let mut chars_iterator = subject.char_indices().enumerate();
38
183
 
39
184
  fancy_regex::RegexBuilder::new(&target)
@@ -51,20 +196,25 @@ fn fancy_captures(target: String, subject: String) -> Vec<(String, usize)> {
51
196
  .collect()
52
197
  }
53
198
 
54
- fn find_char_index(
55
- iterator: &mut std::iter::Enumerate<std::str::CharIndices>,
56
- byte_start: usize,
57
- ) -> usize {
58
- iterator
59
- .find_map(|(idx, (byte_index, _))| (byte_index >= byte_start).then_some(idx))
60
- .unwrap_or(0)
61
- }
62
-
63
199
  #[magnus::init]
64
200
  fn init(ruby: &Ruby) -> Result<(), Error> {
65
201
  let module = ruby.define_module("Xre2")?;
202
+ ruby.define_class("Regexx", ruby.class_object())?;
203
+ let regexxlist = ruby.define_class("RegexxList", ruby.class_object())?;
204
+ regexxlist.define_singleton_method("new", function!(RegexList::new, 1))?;
205
+ module.define_singleton_method("regex", function!(regex, 1))?;
66
206
  module.define_singleton_method("fancy", function!(fancy, 2))?;
67
- module.define_singleton_method("fancy_captures", function!(fancy_captures, 2))?;
68
207
  module.define_singleton_method("onig", function!(onig, 2))?;
208
+ module.define_singleton_method("reg", function!(reg, 2))?;
209
+ module.define_singleton_method("reg_array", function!(reg_array, 2))?;
210
+ module.define_singleton_method("reg_compiled", function!(reg_compiled, 2))?;
211
+ module.define_singleton_method(
212
+ "reg_compiled_with_context",
213
+ function!(reg_compiled_with_context, 2),
214
+ )?;
215
+ module.define_singleton_method(
216
+ "reg_array_with_context",
217
+ function!(reg_array_with_context, 2),
218
+ )?;
69
219
  Ok(())
70
220
  }
data/lib/xre2/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Xre2
4
- VERSION = "0.2.0"
4
+ VERSION = "0.4.0"
5
5
  end
@@ -106,10 +106,6 @@ name = "cc"
106
106
  version = "1.0.90"
107
107
  source = "registry+https://github.com/rust-lang/crates.io-index"
108
108
  checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
109
- dependencies = [
110
- "jobserver",
111
- "libc",
112
- ]
113
109
 
114
110
  [[package]]
115
111
  name = "cexpr"
@@ -231,15 +227,6 @@ dependencies = [
231
227
  "either",
232
228
  ]
233
229
 
234
- [[package]]
235
- name = "jobserver"
236
- version = "0.1.28"
237
- source = "registry+https://github.com/rust-lang/crates.io-index"
238
- checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
239
- dependencies = [
240
- "libc",
241
- ]
242
-
243
230
  [[package]]
244
231
  name = "lazy_static"
245
232
  version = "1.4.0"
@@ -282,9 +269,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
282
269
 
283
270
  [[package]]
284
271
  name = "magnus"
285
- version = "0.6.2"
272
+ version = "0.6.3"
286
273
  source = "registry+https://github.com/rust-lang/crates.io-index"
287
- checksum = "4778544796676e8428e9c622460ebf284bea52d8b10db3aeb449d8b5e61b3a13"
274
+ checksum = "0fc7a31fb0b64761e3cd09a6975577601fccc5f08b8fc9245064fc4f71ed6a9d"
288
275
  dependencies = [
289
276
  "magnus-macros",
290
277
  "rb-sys",
@@ -354,28 +341,6 @@ dependencies = [
354
341
  "pkg-config",
355
342
  ]
356
343
 
357
- [[package]]
358
- name = "pcre2"
359
- version = "0.2.7"
360
- source = "registry+https://github.com/rust-lang/crates.io-index"
361
- checksum = "5ea92ff5eabd27703ab12cefe01b08b2809ec3dc75fdc69d4e6b75fbce0cbd67"
362
- dependencies = [
363
- "libc",
364
- "log",
365
- "pcre2-sys",
366
- ]
367
-
368
- [[package]]
369
- name = "pcre2-sys"
370
- version = "0.2.9"
371
- source = "registry+https://github.com/rust-lang/crates.io-index"
372
- checksum = "550f5d18fb1b90c20b87e161852c10cde77858c3900c5059b5ad2a1449f11d8a"
373
- dependencies = [
374
- "cc",
375
- "libc",
376
- "pkg-config",
377
- ]
378
-
379
344
  [[package]]
380
345
  name = "peeking_take_while"
381
346
  version = "0.1.2"
@@ -510,9 +475,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
510
475
 
511
476
  [[package]]
512
477
  name = "syn"
513
- version = "2.0.55"
478
+ version = "2.0.57"
514
479
  source = "registry+https://github.com/rust-lang/crates.io-index"
515
- checksum = "002a1b3dbf967edfafc32655d0f377ab0bb7b994aa1d32c8cc7e9b8bf3ebb8f0"
480
+ checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35"
516
481
  dependencies = [
517
482
  "proc-macro2",
518
483
  "quote",
@@ -543,6 +508,12 @@ version = "1.0.12"
543
508
  source = "registry+https://github.com/rust-lang/crates.io-index"
544
509
  checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
545
510
 
511
+ [[package]]
512
+ name = "unicode-segmentation"
513
+ version = "1.11.0"
514
+ source = "registry+https://github.com/rust-lang/crates.io-index"
515
+ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
516
+
546
517
  [[package]]
547
518
  name = "unicode-width"
548
519
  version = "0.1.11"
@@ -671,5 +642,6 @@ dependencies = [
671
642
  "fancy-regex",
672
643
  "magnus",
673
644
  "onig",
674
- "pcre2",
645
+ "regex",
646
+ "unicode-segmentation",
675
647
  ]
@@ -11,6 +11,7 @@ crate-type = ["cdylib"]
11
11
 
12
12
  [dependencies]
13
13
  fancy-regex = "0.13.0"
14
- magnus = { version = "0.6.2" }
14
+ magnus = "0.6.2"
15
15
  onig = "6.4.0"
16
- pcre2 = "0.2.7"
16
+ regex = "1.10.4"
17
+ unicode-segmentation = "1.11.0"