xre 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6f016c5f913f154b6bb94e3bd8ebe17e01c540ba11b53b58e57aa1ce3198ec20
4
+ data.tar.gz: dca10ca30f92d2530a85b9d90a7e42a36d3324bd1447499b779fcd9be8cf925e
5
+ SHA512:
6
+ metadata.gz: 4527e85bdb47b7c2425e3bf3df994c6f30f2f3ea221f6dbd87d11bab361836487fe06bb53c9103c398486028f2aa62ed28f0881e5b5b8e3ab89fd8c1a3bee8f4
7
+ data.tar.gz: 7ea058c2c310383719a1a835383c4ceb3ec3c82416089076082674613a4ddaa5606ff4f9a8e5a4295d518e60655fcd104549c6923c74b72182450693ce857116
data/Cargo.lock ADDED
@@ -0,0 +1,348 @@
1
+ # This file is automatically @generated by Cargo.
2
+ # It is not intended for manual editing.
3
+ version = 3
4
+
5
+ [[package]]
6
+ name = "aho-corasick"
7
+ version = "1.1.3"
8
+ source = "registry+https://github.com/rust-lang/crates.io-index"
9
+ checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
10
+ dependencies = [
11
+ "memchr",
12
+ ]
13
+
14
+ [[package]]
15
+ name = "bindgen"
16
+ version = "0.69.4"
17
+ source = "registry+https://github.com/rust-lang/crates.io-index"
18
+ checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
19
+ dependencies = [
20
+ "bitflags",
21
+ "cexpr",
22
+ "clang-sys",
23
+ "itertools",
24
+ "lazy_static",
25
+ "lazycell",
26
+ "proc-macro2",
27
+ "quote",
28
+ "regex",
29
+ "rustc-hash",
30
+ "shlex",
31
+ "syn",
32
+ ]
33
+
34
+ [[package]]
35
+ name = "bitflags"
36
+ version = "2.5.0"
37
+ source = "registry+https://github.com/rust-lang/crates.io-index"
38
+ checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
39
+
40
+ [[package]]
41
+ name = "cexpr"
42
+ version = "0.6.0"
43
+ source = "registry+https://github.com/rust-lang/crates.io-index"
44
+ checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
45
+ dependencies = [
46
+ "nom",
47
+ ]
48
+
49
+ [[package]]
50
+ name = "cfg-if"
51
+ version = "1.0.0"
52
+ source = "registry+https://github.com/rust-lang/crates.io-index"
53
+ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
54
+
55
+ [[package]]
56
+ name = "clang-sys"
57
+ version = "1.7.0"
58
+ source = "registry+https://github.com/rust-lang/crates.io-index"
59
+ checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
60
+ dependencies = [
61
+ "glob",
62
+ "libc",
63
+ "libloading",
64
+ ]
65
+
66
+ [[package]]
67
+ name = "either"
68
+ version = "1.10.0"
69
+ source = "registry+https://github.com/rust-lang/crates.io-index"
70
+ checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
71
+
72
+ [[package]]
73
+ name = "glob"
74
+ version = "0.3.1"
75
+ source = "registry+https://github.com/rust-lang/crates.io-index"
76
+ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
77
+
78
+ [[package]]
79
+ name = "itertools"
80
+ version = "0.12.1"
81
+ source = "registry+https://github.com/rust-lang/crates.io-index"
82
+ checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
83
+ dependencies = [
84
+ "either",
85
+ ]
86
+
87
+ [[package]]
88
+ name = "lazy_static"
89
+ version = "1.4.0"
90
+ source = "registry+https://github.com/rust-lang/crates.io-index"
91
+ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
92
+
93
+ [[package]]
94
+ name = "lazycell"
95
+ version = "1.3.0"
96
+ source = "registry+https://github.com/rust-lang/crates.io-index"
97
+ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
98
+
99
+ [[package]]
100
+ name = "libc"
101
+ version = "0.2.153"
102
+ source = "registry+https://github.com/rust-lang/crates.io-index"
103
+ checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
104
+
105
+ [[package]]
106
+ name = "libloading"
107
+ version = "0.8.3"
108
+ source = "registry+https://github.com/rust-lang/crates.io-index"
109
+ checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19"
110
+ dependencies = [
111
+ "cfg-if",
112
+ "windows-targets",
113
+ ]
114
+
115
+ [[package]]
116
+ name = "magnus"
117
+ version = "0.6.3"
118
+ source = "registry+https://github.com/rust-lang/crates.io-index"
119
+ checksum = "0fc7a31fb0b64761e3cd09a6975577601fccc5f08b8fc9245064fc4f71ed6a9d"
120
+ dependencies = [
121
+ "magnus-macros",
122
+ "rb-sys",
123
+ "rb-sys-env",
124
+ "seq-macro",
125
+ ]
126
+
127
+ [[package]]
128
+ name = "magnus-macros"
129
+ version = "0.6.0"
130
+ source = "registry+https://github.com/rust-lang/crates.io-index"
131
+ checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
132
+ dependencies = [
133
+ "proc-macro2",
134
+ "quote",
135
+ "syn",
136
+ ]
137
+
138
+ [[package]]
139
+ name = "memchr"
140
+ version = "2.7.2"
141
+ source = "registry+https://github.com/rust-lang/crates.io-index"
142
+ checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
143
+
144
+ [[package]]
145
+ name = "minimal-lexical"
146
+ version = "0.2.1"
147
+ source = "registry+https://github.com/rust-lang/crates.io-index"
148
+ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
149
+
150
+ [[package]]
151
+ name = "nom"
152
+ version = "7.1.3"
153
+ source = "registry+https://github.com/rust-lang/crates.io-index"
154
+ checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
155
+ dependencies = [
156
+ "memchr",
157
+ "minimal-lexical",
158
+ ]
159
+
160
+ [[package]]
161
+ name = "proc-macro2"
162
+ version = "1.0.79"
163
+ source = "registry+https://github.com/rust-lang/crates.io-index"
164
+ checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
165
+ dependencies = [
166
+ "unicode-ident",
167
+ ]
168
+
169
+ [[package]]
170
+ name = "quote"
171
+ version = "1.0.36"
172
+ source = "registry+https://github.com/rust-lang/crates.io-index"
173
+ checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
174
+ dependencies = [
175
+ "proc-macro2",
176
+ ]
177
+
178
+ [[package]]
179
+ name = "rb-sys"
180
+ version = "0.9.91"
181
+ source = "registry+https://github.com/rust-lang/crates.io-index"
182
+ checksum = "eb81203e271055178603e243fee397f5f4aac125bcd20036279683fb1445a899"
183
+ dependencies = [
184
+ "rb-sys-build",
185
+ ]
186
+
187
+ [[package]]
188
+ name = "rb-sys-build"
189
+ version = "0.9.91"
190
+ source = "registry+https://github.com/rust-lang/crates.io-index"
191
+ checksum = "9de9403a6aac834e7c9534575cb14188b6b5b99bafe475d18d838d44fbc27d31"
192
+ dependencies = [
193
+ "bindgen",
194
+ "lazy_static",
195
+ "proc-macro2",
196
+ "quote",
197
+ "regex",
198
+ "shell-words",
199
+ "syn",
200
+ ]
201
+
202
+ [[package]]
203
+ name = "rb-sys-env"
204
+ version = "0.1.2"
205
+ source = "registry+https://github.com/rust-lang/crates.io-index"
206
+ checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
207
+
208
+ [[package]]
209
+ name = "regex"
210
+ version = "1.10.4"
211
+ source = "registry+https://github.com/rust-lang/crates.io-index"
212
+ checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
213
+ dependencies = [
214
+ "aho-corasick",
215
+ "memchr",
216
+ "regex-automata",
217
+ "regex-syntax",
218
+ ]
219
+
220
+ [[package]]
221
+ name = "regex-automata"
222
+ version = "0.4.6"
223
+ source = "registry+https://github.com/rust-lang/crates.io-index"
224
+ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
225
+ dependencies = [
226
+ "aho-corasick",
227
+ "memchr",
228
+ "regex-syntax",
229
+ ]
230
+
231
+ [[package]]
232
+ name = "regex-syntax"
233
+ version = "0.8.3"
234
+ source = "registry+https://github.com/rust-lang/crates.io-index"
235
+ checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
236
+
237
+ [[package]]
238
+ name = "rustc-hash"
239
+ version = "1.1.0"
240
+ source = "registry+https://github.com/rust-lang/crates.io-index"
241
+ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
242
+
243
+ [[package]]
244
+ name = "seq-macro"
245
+ version = "0.3.5"
246
+ source = "registry+https://github.com/rust-lang/crates.io-index"
247
+ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
248
+
249
+ [[package]]
250
+ name = "shell-words"
251
+ version = "1.1.0"
252
+ source = "registry+https://github.com/rust-lang/crates.io-index"
253
+ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
254
+
255
+ [[package]]
256
+ name = "shlex"
257
+ version = "1.3.0"
258
+ source = "registry+https://github.com/rust-lang/crates.io-index"
259
+ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
260
+
261
+ [[package]]
262
+ name = "syn"
263
+ version = "2.0.58"
264
+ source = "registry+https://github.com/rust-lang/crates.io-index"
265
+ checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687"
266
+ dependencies = [
267
+ "proc-macro2",
268
+ "quote",
269
+ "unicode-ident",
270
+ ]
271
+
272
+ [[package]]
273
+ name = "unicode-ident"
274
+ version = "1.0.12"
275
+ source = "registry+https://github.com/rust-lang/crates.io-index"
276
+ checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
277
+
278
+ [[package]]
279
+ name = "unicode-segmentation"
280
+ version = "1.11.0"
281
+ source = "registry+https://github.com/rust-lang/crates.io-index"
282
+ checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
283
+
284
+ [[package]]
285
+ name = "windows-targets"
286
+ version = "0.52.4"
287
+ source = "registry+https://github.com/rust-lang/crates.io-index"
288
+ checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
289
+ dependencies = [
290
+ "windows_aarch64_gnullvm",
291
+ "windows_aarch64_msvc",
292
+ "windows_i686_gnu",
293
+ "windows_i686_msvc",
294
+ "windows_x86_64_gnu",
295
+ "windows_x86_64_gnullvm",
296
+ "windows_x86_64_msvc",
297
+ ]
298
+
299
+ [[package]]
300
+ name = "windows_aarch64_gnullvm"
301
+ version = "0.52.4"
302
+ source = "registry+https://github.com/rust-lang/crates.io-index"
303
+ checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
304
+
305
+ [[package]]
306
+ name = "windows_aarch64_msvc"
307
+ version = "0.52.4"
308
+ source = "registry+https://github.com/rust-lang/crates.io-index"
309
+ checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
310
+
311
+ [[package]]
312
+ name = "windows_i686_gnu"
313
+ version = "0.52.4"
314
+ source = "registry+https://github.com/rust-lang/crates.io-index"
315
+ checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
316
+
317
+ [[package]]
318
+ name = "windows_i686_msvc"
319
+ version = "0.52.4"
320
+ source = "registry+https://github.com/rust-lang/crates.io-index"
321
+ checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
322
+
323
+ [[package]]
324
+ name = "windows_x86_64_gnu"
325
+ version = "0.52.4"
326
+ source = "registry+https://github.com/rust-lang/crates.io-index"
327
+ checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
328
+
329
+ [[package]]
330
+ name = "windows_x86_64_gnullvm"
331
+ version = "0.52.4"
332
+ source = "registry+https://github.com/rust-lang/crates.io-index"
333
+ checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
334
+
335
+ [[package]]
336
+ name = "windows_x86_64_msvc"
337
+ version = "0.52.4"
338
+ source = "registry+https://github.com/rust-lang/crates.io-index"
339
+ checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
340
+
341
+ [[package]]
342
+ name = "xre"
343
+ version = "0.1.0"
344
+ dependencies = [
345
+ "magnus",
346
+ "regex",
347
+ "unicode-segmentation",
348
+ ]
data/Cargo.toml ADDED
@@ -0,0 +1,10 @@
1
+ # This Cargo.toml is here to let externals tools (IDEs, etc.) know that this is
2
+ # a Rust project. Your extensions dependencies should be added to the Cargo.toml
3
+ # in the ext/ directory.
4
+
5
+ [workspace]
6
+ members = ["./ext/xre"]
7
+ resolver = "2"
8
+
9
+ [profile]
10
+ release.lto = true
@@ -0,0 +1,15 @@
1
+ [package]
2
+ name = "xre"
3
+ version = "0.1.0"
4
+ edition = "2021"
5
+ authors = ["barseek <sergey.b@hey.com>"]
6
+ license = "MIT"
7
+ publish = false
8
+
9
+ [lib]
10
+ crate-type = ["cdylib"]
11
+
12
+ [dependencies]
13
+ magnus = "0.6.2"
14
+ regex = "1.10.4"
15
+ unicode-segmentation = "1.11.0"
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+ require "rb_sys/mkmf"
5
+
6
+ create_rust_makefile("xre")
@@ -0,0 +1,25 @@
1
+ mod regex_list;
2
+
3
+ use magnus::{function, method, prelude::*, Error, Ruby};
4
+ use regex_list::RegexList;
5
+
6
+ #[magnus::init]
7
+ fn init(ruby: &Ruby) -> Result<(), Error> {
8
+ let module = ruby.define_module("Xre")?;
9
+ let regex_list = module.define_class("RegexList", ruby.class_object())?;
10
+ regex_list.define_singleton_method("new", function!(RegexList::new, 1))?;
11
+ regex_list.define_singleton_method(
12
+ "build_from_strings",
13
+ function!(RegexList::build_from_strings, 1),
14
+ )?;
15
+ regex_list.define_method(
16
+ "__captures_with_context",
17
+ method!(RegexList::captures_with_context, 2),
18
+ )?;
19
+ regex_list.define_method(
20
+ "__captures_without_context",
21
+ method!(RegexList::captures_without_context, 1),
22
+ )?;
23
+ regex_list.define_method("__targets", method!(RegexList::targets, 0))?;
24
+ Ok(())
25
+ }
@@ -0,0 +1,87 @@
1
+ use std::iter::Enumerate;
2
+ use std::ops::Range;
3
+ use std::str::CharIndices;
4
+ use unicode_segmentation::UnicodeSegmentation;
5
+
6
+ pub fn find_char_index(iterator: &mut Enumerate<CharIndices>, byte_start: usize) -> usize {
7
+ iterator
8
+ .find_map(|(idx, (byte_index, _))| (byte_index >= byte_start).then_some(idx))
9
+ .unwrap_or_default()
10
+ }
11
+
12
+ pub fn string_range_with_radius(subject: &str, range: Range<usize>, radius: usize) -> String {
13
+ let l_text = subject[..range.start]
14
+ .graphemes(true)
15
+ .rev()
16
+ .take(radius)
17
+ .take_while(|g| *g != "\n")
18
+ .collect::<String>()
19
+ .chars()
20
+ .rev()
21
+ .collect::<String>();
22
+ let r_text = subject[range.end..]
23
+ .graphemes(true)
24
+ .take(radius)
25
+ .take_while(|g| *g != "\n")
26
+ .collect::<String>();
27
+ format!("{}{}{}", l_text, &subject[range], r_text)
28
+ }
29
+
30
+ #[cfg(test)]
31
+ mod tests {
32
+ use super::*;
33
+
34
+ #[test]
35
+ fn test_string_range_with_radius_when_subject_is_too_small() {
36
+ let subject = "abc";
37
+ let range = 1..2;
38
+ let radius = 5;
39
+ assert_eq!(string_range_with_radius(subject, range, radius), "abc");
40
+ }
41
+
42
+ #[test]
43
+ fn test_string_range_with_radius() {
44
+ let subject = "abc def ghi";
45
+ let range = 4..7;
46
+ let radius = 5;
47
+ assert_eq!(
48
+ string_range_with_radius(subject, range, radius),
49
+ "abc def ghi"
50
+ );
51
+ }
52
+
53
+ #[test]
54
+ fn test_string_range_with_radius_with_non_byte_chars() {
55
+ let subject = "👨asdf👩asdf👧asdf👦";
56
+ let range = 13..15;
57
+ let radius = 5;
58
+ assert_eq!(
59
+ string_range_with_radius(subject, range, radius),
60
+ "sdf👩asdf👧asd"
61
+ );
62
+ }
63
+
64
+ #[test]
65
+ fn find_char_index_when_byte_exists() {
66
+ let subject = "abc def ghi";
67
+ let mut chars_iterator = subject.char_indices().enumerate();
68
+ let byte_start = 4;
69
+ assert_eq!(find_char_index(&mut chars_iterator, byte_start), 4);
70
+ }
71
+
72
+ #[test]
73
+ fn find_char_index_with_non_byte_chars() {
74
+ let subject = "👨asdf👩asdf";
75
+ let mut chars_iterator = subject.char_indices().enumerate();
76
+ let byte_start = 7;
77
+ assert_eq!(find_char_index(&mut chars_iterator, byte_start), 4);
78
+ }
79
+
80
+ #[test]
81
+ fn find_char_index_when_byte_does_not_exist() {
82
+ let subject = "abc def ghi";
83
+ let mut chars_iterator = subject.char_indices().enumerate();
84
+ let byte_start = 100;
85
+ assert_eq!(find_char_index(&mut chars_iterator, byte_start), 0)
86
+ }
87
+ }
@@ -0,0 +1,139 @@
1
+ mod utils;
2
+
3
+ use magnus::{exception, Error};
4
+ use regex::{escape, Match, Regex, RegexBuilder};
5
+ use unicode_segmentation::UnicodeSegmentation;
6
+ use utils::*;
7
+
8
+ #[magnus::wrap(class = "Xre::RegexList")]
9
+ pub struct RegexList {
10
+ targets: Vec<Regex>,
11
+ }
12
+
13
+ impl RegexList {
14
+ pub fn new(targets: Vec<String>) -> Result<Self, Error> {
15
+ Ok(Self {
16
+ targets: targets
17
+ .iter()
18
+ .map(|r| regex(r))
19
+ .collect::<Result<Vec<Regex>, Error>>()?,
20
+ })
21
+ }
22
+
23
+ pub fn build_from_strings(targets: Vec<Vec<String>>) -> Result<Self, Error> {
24
+ let targets = targets
25
+ .iter()
26
+ .map(|t| combine_into_regex_like(t))
27
+ .collect::<Vec<String>>();
28
+
29
+ Self::new(targets)
30
+ }
31
+
32
+ pub fn targets(&self) -> Vec<String> {
33
+ self.targets
34
+ .iter()
35
+ .map(|r| r.as_str().to_string())
36
+ .collect()
37
+ }
38
+
39
+ pub fn captures_with_context(
40
+ &self,
41
+ subject: String,
42
+ radius: usize,
43
+ ) -> Result<Vec<(String, usize, String)>, Error> {
44
+ Ok(self
45
+ .targets
46
+ .iter()
47
+ .map(|t| captures_with_context(t, &subject, radius))
48
+ .collect::<Result<Vec<Vec<(String, usize, String)>>, Error>>()?
49
+ .into_iter()
50
+ .flatten()
51
+ .collect())
52
+ }
53
+
54
+ pub fn captures_without_context(&self, subject: String) -> Result<Vec<(String, usize)>, Error> {
55
+ Ok(self
56
+ .targets
57
+ .iter()
58
+ .map(|t| captures_without_context(t, &subject))
59
+ .collect::<Result<Vec<Vec<(String, usize)>>, Error>>()?
60
+ .into_iter()
61
+ .flatten()
62
+ .collect())
63
+ }
64
+ }
65
+
66
+ fn fuzzy_escape(grapheme: &str) -> String {
67
+ match grapheme {
68
+ " " | "\u{00A0}" | "_" | "-" => "[ \u{00A0}_-]?".to_string(),
69
+ "'" | "‘" | "’" => "['‘’]".to_string(),
70
+ c => escape(c),
71
+ }
72
+ }
73
+
74
+ fn combine_into_regex_like(targets: &Vec<String>) -> String {
75
+ let mut targets = targets
76
+ .iter()
77
+ .map(|t| t.graphemes(true).map(|g| fuzzy_escape(g)).collect())
78
+ .collect::<Vec<String>>();
79
+
80
+ targets.sort_unstable_by(|a, b| b.len().cmp(&a.len()));
81
+
82
+ format!("(?im:({}))", targets.join("|"))
83
+ }
84
+
85
+ fn regex(target: &str) -> Result<Regex, Error> {
86
+ RegexBuilder::new(target)
87
+ .multi_line(true)
88
+ .build()
89
+ .map_err(|_| Error::new(exception::arg_error(), format!("Invalid regex: {}", target)))
90
+ }
91
+
92
+ fn captures_with_context(
93
+ target: &Regex,
94
+ subject: &str,
95
+ radius: usize,
96
+ ) -> Result<Vec<(String, usize, String)>, Error> {
97
+ let mut chars_iterator = subject.char_indices().enumerate();
98
+
99
+ let captures = captures(target, subject)?;
100
+
101
+ captures
102
+ .iter()
103
+ .map(|m| {
104
+ Ok((
105
+ m.as_str().trim().to_string(),
106
+ find_char_index(&mut chars_iterator, m.start()),
107
+ string_range_with_radius(subject, m.start()..m.end(), radius),
108
+ ))
109
+ })
110
+ .collect()
111
+ }
112
+
113
+ fn captures_without_context(target: &Regex, subject: &str) -> Result<Vec<(String, usize)>, Error> {
114
+ let mut chars_iterator = subject.char_indices().enumerate();
115
+
116
+ let captures = captures(target, subject)?;
117
+
118
+ captures
119
+ .iter()
120
+ .map(|m| {
121
+ Ok((
122
+ m.as_str().trim().to_string(),
123
+ find_char_index(&mut chars_iterator, m.start()),
124
+ ))
125
+ })
126
+ .collect()
127
+ }
128
+
129
+ fn captures<'a>(target: &'a Regex, subject: &'a str) -> Result<Vec<Match<'a>>, Error> {
130
+ target
131
+ .captures_iter(subject)
132
+ .map(|m| {
133
+ m.get(0).ok_or(Error::new(
134
+ exception::runtime_error(),
135
+ "No capture group found",
136
+ ))
137
+ })
138
+ .collect::<Result<Vec<Match>, Error>>()
139
+ }
data/lib/xre.rb ADDED
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "xre/xre"
4
+
5
+ module Xre
6
+ class RegexList
7
+ def captures(text, context_radius: nil)
8
+ if context_radius
9
+ __captures_with_context(text, context_radius)
10
+ else
11
+ __captures_without_context(text)
12
+ end
13
+ end
14
+
15
+ def inspect
16
+ "#<Xre::RegexList targets=#{__targets}"
17
+ end
18
+
19
+ private :__captures_with_context, :__captures_without_context, :__targets
20
+ end
21
+ end