xre2 0.1.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Cargo.lock +12 -40
- data/README.md +6 -10
- data/ext/xre2/Cargo.toml +3 -2
- data/ext/xre2/src/lib.rs +192 -16
- data/lib/xre2/version.rb +1 -1
- data/tmp/arm64-darwin21/stage/Cargo.lock +647 -0
- data/tmp/arm64-darwin21/stage/ext/xre2/Cargo.toml +3 -2
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/Cargo.lock +647 -0
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/Cargo.toml +7 -0
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/ext/xre2/Cargo.toml +17 -0
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/Cargo.lock +647 -0
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/Cargo.toml +7 -0
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/ext/xre2/Cargo.toml +17 -0
- metadata +9 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6401693cee9fee608b66f07638eb684c95a0ee5d16519aca6d272920cc8bbe3a
|
4
|
+
data.tar.gz: 9c6a061de9b45aeb7acbc5404dd0b21c3f0f394a3f54632e993a335b6a6da5de
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8feee07ba3c54a1ffda829a7b4903dd9ef9dc909dd8ba87b1c5cfffa3ee06a75ae712ae71da69ade4cb14f193e7d8f9f78a8d0696cb3e72bef3e583b07c85677
|
7
|
+
data.tar.gz: 757d878581016a0f3299b2d4c677e4c4db129ff864469fba7f24b91b147066c461e1caa1c607e8981487768e0cf538cbe63618108c9d77fc21defca8109d22f1
|
data/Cargo.lock
CHANGED
@@ -106,10 +106,6 @@ name = "cc"
|
|
106
106
|
version = "1.0.90"
|
107
107
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
108
108
|
checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5"
|
109
|
-
dependencies = [
|
110
|
-
"jobserver",
|
111
|
-
"libc",
|
112
|
-
]
|
113
109
|
|
114
110
|
[[package]]
|
115
111
|
name = "cexpr"
|
@@ -231,15 +227,6 @@ dependencies = [
|
|
231
227
|
"either",
|
232
228
|
]
|
233
229
|
|
234
|
-
[[package]]
|
235
|
-
name = "jobserver"
|
236
|
-
version = "0.1.28"
|
237
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
238
|
-
checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6"
|
239
|
-
dependencies = [
|
240
|
-
"libc",
|
241
|
-
]
|
242
|
-
|
243
230
|
[[package]]
|
244
231
|
name = "lazy_static"
|
245
232
|
version = "1.4.0"
|
@@ -282,9 +269,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
|
282
269
|
|
283
270
|
[[package]]
|
284
271
|
name = "magnus"
|
285
|
-
version = "0.6.
|
272
|
+
version = "0.6.3"
|
286
273
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
287
|
-
checksum = "
|
274
|
+
checksum = "0fc7a31fb0b64761e3cd09a6975577601fccc5f08b8fc9245064fc4f71ed6a9d"
|
288
275
|
dependencies = [
|
289
276
|
"magnus-macros",
|
290
277
|
"rb-sys",
|
@@ -354,28 +341,6 @@ dependencies = [
|
|
354
341
|
"pkg-config",
|
355
342
|
]
|
356
343
|
|
357
|
-
[[package]]
|
358
|
-
name = "pcre2"
|
359
|
-
version = "0.2.7"
|
360
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
361
|
-
checksum = "5ea92ff5eabd27703ab12cefe01b08b2809ec3dc75fdc69d4e6b75fbce0cbd67"
|
362
|
-
dependencies = [
|
363
|
-
"libc",
|
364
|
-
"log",
|
365
|
-
"pcre2-sys",
|
366
|
-
]
|
367
|
-
|
368
|
-
[[package]]
|
369
|
-
name = "pcre2-sys"
|
370
|
-
version = "0.2.9"
|
371
|
-
source = "registry+https://github.com/rust-lang/crates.io-index"
|
372
|
-
checksum = "550f5d18fb1b90c20b87e161852c10cde77858c3900c5059b5ad2a1449f11d8a"
|
373
|
-
dependencies = [
|
374
|
-
"cc",
|
375
|
-
"libc",
|
376
|
-
"pkg-config",
|
377
|
-
]
|
378
|
-
|
379
344
|
[[package]]
|
380
345
|
name = "peeking_take_while"
|
381
346
|
version = "0.1.2"
|
@@ -510,9 +475,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
|
510
475
|
|
511
476
|
[[package]]
|
512
477
|
name = "syn"
|
513
|
-
version = "2.0.
|
478
|
+
version = "2.0.57"
|
514
479
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
515
|
-
checksum = "
|
480
|
+
checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35"
|
516
481
|
dependencies = [
|
517
482
|
"proc-macro2",
|
518
483
|
"quote",
|
@@ -543,6 +508,12 @@ version = "1.0.12"
|
|
543
508
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
544
509
|
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
545
510
|
|
511
|
+
[[package]]
|
512
|
+
name = "unicode-segmentation"
|
513
|
+
version = "1.11.0"
|
514
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
515
|
+
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
|
516
|
+
|
546
517
|
[[package]]
|
547
518
|
name = "unicode-width"
|
548
519
|
version = "0.1.11"
|
@@ -671,5 +642,6 @@ dependencies = [
|
|
671
642
|
"fancy-regex",
|
672
643
|
"magnus",
|
673
644
|
"onig",
|
674
|
-
"
|
645
|
+
"regex",
|
646
|
+
"unicode-segmentation",
|
675
647
|
]
|
data/README.md
CHANGED
@@ -1,24 +1,20 @@
|
|
1
1
|
# Xre2
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/xre2`. To experiment with that code, run `bin/console` for an interactive prompt.
|
3
|
+
some rust regexes for ruby
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
9
|
-
TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
|
10
|
-
|
11
7
|
Install the gem and add to the application's Gemfile by executing:
|
12
8
|
|
13
|
-
$ bundle add
|
9
|
+
$ bundle add xre2
|
14
10
|
|
15
11
|
If bundler is not being used to manage dependencies, install the gem by executing:
|
16
12
|
|
17
|
-
$ gem install
|
13
|
+
$ gem install xre2
|
18
14
|
|
19
15
|
## Usage
|
20
16
|
|
21
|
-
|
17
|
+
todo
|
22
18
|
|
23
19
|
## Development
|
24
20
|
|
@@ -28,7 +24,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
28
24
|
|
29
25
|
## Contributing
|
30
26
|
|
31
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
27
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/vagab/xre2. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/vagab/xre2/blob/main/CODE_OF_CONDUCT.md).
|
32
28
|
|
33
29
|
## License
|
34
30
|
|
@@ -36,4 +32,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
36
32
|
|
37
33
|
## Code of Conduct
|
38
34
|
|
39
|
-
Everyone interacting in the Xre2 project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/
|
35
|
+
Everyone interacting in the Xre2 project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/vagab/xre2/blob/main/CODE_OF_CONDUCT.md).
|
data/ext/xre2/Cargo.toml
CHANGED
data/ext/xre2/src/lib.rs
CHANGED
@@ -1,34 +1,196 @@
|
|
1
1
|
use magnus::{function, prelude::*, Error, Ruby};
|
2
|
-
use
|
2
|
+
use std::iter::Enumerate;
|
3
|
+
use std::ops::Range;
|
4
|
+
use std::str::CharIndices;
|
5
|
+
use unicode_segmentation::UnicodeSegmentation;
|
6
|
+
|
7
|
+
#[magnus::wrap(class = "Regexx")]
|
8
|
+
struct Regex {
|
9
|
+
regex: regex::Regex,
|
10
|
+
}
|
11
|
+
|
12
|
+
#[magnus::wrap(class = "RegexxList")]
|
13
|
+
struct RegexList {
|
14
|
+
regexes: Vec<regex::Regex>,
|
15
|
+
}
|
16
|
+
|
17
|
+
impl RegexList {
|
18
|
+
fn new(regexes: Vec<String>) -> Self {
|
19
|
+
Self {
|
20
|
+
regexes: regexes.iter().map(|r| regex(r.to_string()).regex).collect(),
|
21
|
+
}
|
22
|
+
}
|
23
|
+
}
|
24
|
+
|
25
|
+
fn regex(target: String) -> Regex {
|
26
|
+
let regex = regex::RegexBuilder::new(&target)
|
27
|
+
.multi_line(true)
|
28
|
+
.build()
|
29
|
+
.unwrap();
|
30
|
+
|
31
|
+
Regex { regex }
|
32
|
+
}
|
33
|
+
|
34
|
+
fn reg_array(target: &RegexList, subject: String) -> Vec<(String, usize)> {
|
35
|
+
target
|
36
|
+
.regexes
|
37
|
+
.iter()
|
38
|
+
.map(|t| reg_compiled_array(t, &subject))
|
39
|
+
.flatten()
|
40
|
+
.collect()
|
41
|
+
}
|
42
|
+
|
43
|
+
fn reg_array_with_context(target: &RegexList, subject: String) -> Vec<(String, usize, String)> {
|
44
|
+
target
|
45
|
+
.regexes
|
46
|
+
.iter()
|
47
|
+
.map(|t| reg_compiled_array_with_context(t, &subject))
|
48
|
+
.flatten()
|
49
|
+
.collect()
|
50
|
+
}
|
51
|
+
|
52
|
+
fn reg_compiled_array(target: ®ex::Regex, subject: &str) -> Vec<(String, usize)> {
|
53
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
54
|
+
|
55
|
+
target
|
56
|
+
.captures_iter(subject)
|
57
|
+
.map(|m| m.get(1).unwrap())
|
58
|
+
.map(|m| {
|
59
|
+
(
|
60
|
+
m.as_str().trim().to_string(),
|
61
|
+
find_char_index(&mut chars_iterator, m.start()),
|
62
|
+
)
|
63
|
+
})
|
64
|
+
.collect()
|
65
|
+
}
|
66
|
+
|
67
|
+
fn reg_compiled_array_with_context(
|
68
|
+
target: ®ex::Regex,
|
69
|
+
subject: &str,
|
70
|
+
) -> Vec<(String, usize, String)> {
|
71
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
72
|
+
|
73
|
+
target
|
74
|
+
.captures_iter(subject)
|
75
|
+
.map(|m| m.get(1).unwrap())
|
76
|
+
.map(|m| {
|
77
|
+
(
|
78
|
+
m.as_str().trim().to_string(),
|
79
|
+
find_char_index(&mut chars_iterator, m.start()),
|
80
|
+
extract_from_str(&subject, m.start()..m.end(), 10),
|
81
|
+
)
|
82
|
+
})
|
83
|
+
.collect()
|
84
|
+
}
|
85
|
+
|
86
|
+
fn reg_compiled(target: &Regex, subject: String) -> Vec<(String, usize)> {
|
87
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
88
|
+
|
89
|
+
target
|
90
|
+
.regex
|
91
|
+
.captures_iter(&subject)
|
92
|
+
.map(|m| m.get(1).unwrap())
|
93
|
+
.map(|m| {
|
94
|
+
(
|
95
|
+
m.as_str().trim().to_string(),
|
96
|
+
find_char_index(&mut chars_iterator, m.start()),
|
97
|
+
)
|
98
|
+
})
|
99
|
+
.collect()
|
100
|
+
}
|
101
|
+
|
102
|
+
fn reg_compiled_with_context(target: &Regex, subject: String) -> Vec<(String, usize, String)> {
|
103
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
104
|
+
|
105
|
+
target
|
106
|
+
.regex
|
107
|
+
.captures_iter(&subject)
|
108
|
+
.map(|m| m.get(1).unwrap())
|
109
|
+
.map(|m| {
|
110
|
+
(
|
111
|
+
m.as_str().trim().to_string(),
|
112
|
+
find_char_index(&mut chars_iterator, m.start()),
|
113
|
+
extract_from_str(&subject, m.start()..m.end(), 10),
|
114
|
+
)
|
115
|
+
})
|
116
|
+
.collect()
|
117
|
+
}
|
118
|
+
|
119
|
+
fn find_char_index(iterator: &mut Enumerate<CharIndices>, byte_start: usize) -> usize {
|
120
|
+
iterator
|
121
|
+
.find_map(|(idx, (byte_index, _))| (byte_index >= byte_start).then_some(idx))
|
122
|
+
.unwrap_or(0)
|
123
|
+
}
|
124
|
+
|
125
|
+
fn extract_from_vec(vec: &Vec<char>, range: Range<usize>, radius: usize) -> String {
|
126
|
+
vec[range.start.saturating_sub(radius)..range.end.saturating_add(radius).min(vec.len() - 1)]
|
127
|
+
.iter()
|
128
|
+
.collect()
|
129
|
+
}
|
130
|
+
|
131
|
+
fn extract_from_str(subject: &str, range: Range<usize>, radius: usize) -> String {
|
132
|
+
let l_text = subject[..range.start]
|
133
|
+
.graphemes(true)
|
134
|
+
.rev()
|
135
|
+
.take(radius)
|
136
|
+
.collect::<String>()
|
137
|
+
.chars()
|
138
|
+
.rev()
|
139
|
+
.collect::<String>();
|
140
|
+
let r_text = subject[range.end..]
|
141
|
+
.graphemes(true)
|
142
|
+
.skip(1)
|
143
|
+
.take(radius)
|
144
|
+
.collect::<String>();
|
145
|
+
format!("{}{}{}", l_text, &subject[range], r_text)
|
146
|
+
}
|
147
|
+
|
148
|
+
fn onig(target: String, subject: String) -> Vec<(String, usize)> {
|
149
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
3
150
|
|
4
|
-
fn onig(target: String, subject: String) -> Vec<(usize, usize)> {
|
5
151
|
onig::Regex::new(&target)
|
6
152
|
.unwrap()
|
7
153
|
.find_iter(&subject)
|
154
|
+
.map(|m| {
|
155
|
+
(
|
156
|
+
subject[m.0..m.1].to_string(),
|
157
|
+
find_char_index(&mut chars_iterator, m.0),
|
158
|
+
)
|
159
|
+
})
|
8
160
|
.collect()
|
9
161
|
}
|
10
162
|
|
11
|
-
fn
|
12
|
-
|
13
|
-
|
163
|
+
fn reg(target: String, subject: String) -> Vec<(String, usize)> {
|
164
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
165
|
+
|
166
|
+
regex::RegexBuilder::new(&target)
|
167
|
+
// .multi_line(true)
|
14
168
|
.build()
|
15
169
|
.unwrap()
|
16
|
-
.
|
17
|
-
.map(|m| m.unwrap())
|
18
|
-
.map(|m|
|
170
|
+
.captures_iter(&subject)
|
171
|
+
.map(|m| m.get(1).unwrap())
|
172
|
+
.map(|m| {
|
173
|
+
(
|
174
|
+
m.as_str().trim().to_string(),
|
175
|
+
find_char_index(&mut chars_iterator, m.start()),
|
176
|
+
)
|
177
|
+
})
|
19
178
|
.collect()
|
20
179
|
}
|
21
180
|
|
22
|
-
fn
|
23
|
-
|
24
|
-
|
181
|
+
fn fancy(target: String, subject: String) -> Vec<(String, usize)> {
|
182
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
183
|
+
|
184
|
+
fancy_regex::RegexBuilder::new(&target)
|
185
|
+
.backtrack_limit(100_000_000)
|
186
|
+
.build()
|
25
187
|
.unwrap()
|
26
|
-
.
|
27
|
-
.map(|m| m.unwrap())
|
188
|
+
.captures_iter(&subject)
|
189
|
+
.map(|m| m.unwrap().iter().nth(1).unwrap().unwrap())
|
28
190
|
.map(|m| {
|
29
191
|
(
|
30
|
-
|
31
|
-
m.start()
|
192
|
+
m.as_str().trim().to_string(),
|
193
|
+
find_char_index(&mut chars_iterator, m.start()),
|
32
194
|
)
|
33
195
|
})
|
34
196
|
.collect()
|
@@ -37,8 +199,22 @@ fn pcre2(target: String, subject: String) -> Vec<(String, usize)> {
|
|
37
199
|
#[magnus::init]
|
38
200
|
fn init(ruby: &Ruby) -> Result<(), Error> {
|
39
201
|
let module = ruby.define_module("Xre2")?;
|
202
|
+
ruby.define_class("Regexx", ruby.class_object())?;
|
203
|
+
let regexxlist = ruby.define_class("RegexxList", ruby.class_object())?;
|
204
|
+
regexxlist.define_singleton_method("new", function!(RegexList::new, 1))?;
|
205
|
+
module.define_singleton_method("regex", function!(regex, 1))?;
|
40
206
|
module.define_singleton_method("fancy", function!(fancy, 2))?;
|
41
207
|
module.define_singleton_method("onig", function!(onig, 2))?;
|
42
|
-
module.define_singleton_method("
|
208
|
+
module.define_singleton_method("reg", function!(reg, 2))?;
|
209
|
+
module.define_singleton_method("reg_array", function!(reg_array, 2))?;
|
210
|
+
module.define_singleton_method("reg_compiled", function!(reg_compiled, 2))?;
|
211
|
+
module.define_singleton_method(
|
212
|
+
"reg_compiled_with_context",
|
213
|
+
function!(reg_compiled_with_context, 2),
|
214
|
+
)?;
|
215
|
+
module.define_singleton_method(
|
216
|
+
"reg_array_with_context",
|
217
|
+
function!(reg_array_with_context, 2),
|
218
|
+
)?;
|
43
219
|
Ok(())
|
44
220
|
}
|
data/lib/xre2/version.rb
CHANGED