xre 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Cargo.lock +348 -0
- data/Cargo.toml +10 -0
- data/ext/xre/Cargo.toml +15 -0
- data/ext/xre/extconf.rb +6 -0
- data/ext/xre/src/lib.rs +25 -0
- data/ext/xre/src/regex_list/utils.rs +87 -0
- data/ext/xre/src/regex_list.rs +139 -0
- data/lib/xre.rb +21 -0
- data/tmp/arm64-darwin21/stage/Cargo.lock +348 -0
- data/tmp/arm64-darwin21/stage/Cargo.toml +10 -0
- data/tmp/arm64-darwin21/stage/ext/xre/Cargo.toml +15 -0
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/Cargo.lock +348 -0
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/Cargo.toml +10 -0
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/ext/xre/Cargo.toml +15 -0
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/Cargo.lock +348 -0
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/Cargo.toml +10 -0
- data/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/tmp/arm64-darwin21/stage/ext/xre/Cargo.toml +15 -0
- metadata +77 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 6f016c5f913f154b6bb94e3bd8ebe17e01c540ba11b53b58e57aa1ce3198ec20
|
4
|
+
data.tar.gz: dca10ca30f92d2530a85b9d90a7e42a36d3324bd1447499b779fcd9be8cf925e
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 4527e85bdb47b7c2425e3bf3df994c6f30f2f3ea221f6dbd87d11bab361836487fe06bb53c9103c398486028f2aa62ed28f0881e5b5b8e3ab89fd8c1a3bee8f4
|
7
|
+
data.tar.gz: 7ea058c2c310383719a1a835383c4ceb3ec3c82416089076082674613a4ddaa5606ff4f9a8e5a4295d518e60655fcd104549c6923c74b72182450693ce857116
|
data/Cargo.lock
ADDED
@@ -0,0 +1,348 @@
|
|
1
|
+
# This file is automatically @generated by Cargo.
|
2
|
+
# It is not intended for manual editing.
|
3
|
+
version = 3
|
4
|
+
|
5
|
+
[[package]]
|
6
|
+
name = "aho-corasick"
|
7
|
+
version = "1.1.3"
|
8
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
9
|
+
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
10
|
+
dependencies = [
|
11
|
+
"memchr",
|
12
|
+
]
|
13
|
+
|
14
|
+
[[package]]
|
15
|
+
name = "bindgen"
|
16
|
+
version = "0.69.4"
|
17
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
18
|
+
checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0"
|
19
|
+
dependencies = [
|
20
|
+
"bitflags",
|
21
|
+
"cexpr",
|
22
|
+
"clang-sys",
|
23
|
+
"itertools",
|
24
|
+
"lazy_static",
|
25
|
+
"lazycell",
|
26
|
+
"proc-macro2",
|
27
|
+
"quote",
|
28
|
+
"regex",
|
29
|
+
"rustc-hash",
|
30
|
+
"shlex",
|
31
|
+
"syn",
|
32
|
+
]
|
33
|
+
|
34
|
+
[[package]]
|
35
|
+
name = "bitflags"
|
36
|
+
version = "2.5.0"
|
37
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
38
|
+
checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
|
39
|
+
|
40
|
+
[[package]]
|
41
|
+
name = "cexpr"
|
42
|
+
version = "0.6.0"
|
43
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
44
|
+
checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
|
45
|
+
dependencies = [
|
46
|
+
"nom",
|
47
|
+
]
|
48
|
+
|
49
|
+
[[package]]
|
50
|
+
name = "cfg-if"
|
51
|
+
version = "1.0.0"
|
52
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
53
|
+
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
54
|
+
|
55
|
+
[[package]]
|
56
|
+
name = "clang-sys"
|
57
|
+
version = "1.7.0"
|
58
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
59
|
+
checksum = "67523a3b4be3ce1989d607a828d036249522dd9c1c8de7f4dd2dae43a37369d1"
|
60
|
+
dependencies = [
|
61
|
+
"glob",
|
62
|
+
"libc",
|
63
|
+
"libloading",
|
64
|
+
]
|
65
|
+
|
66
|
+
[[package]]
|
67
|
+
name = "either"
|
68
|
+
version = "1.10.0"
|
69
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
70
|
+
checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
|
71
|
+
|
72
|
+
[[package]]
|
73
|
+
name = "glob"
|
74
|
+
version = "0.3.1"
|
75
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
76
|
+
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
77
|
+
|
78
|
+
[[package]]
|
79
|
+
name = "itertools"
|
80
|
+
version = "0.12.1"
|
81
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
82
|
+
checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
|
83
|
+
dependencies = [
|
84
|
+
"either",
|
85
|
+
]
|
86
|
+
|
87
|
+
[[package]]
|
88
|
+
name = "lazy_static"
|
89
|
+
version = "1.4.0"
|
90
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
91
|
+
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
92
|
+
|
93
|
+
[[package]]
|
94
|
+
name = "lazycell"
|
95
|
+
version = "1.3.0"
|
96
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
97
|
+
checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
|
98
|
+
|
99
|
+
[[package]]
|
100
|
+
name = "libc"
|
101
|
+
version = "0.2.153"
|
102
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
103
|
+
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
|
104
|
+
|
105
|
+
[[package]]
|
106
|
+
name = "libloading"
|
107
|
+
version = "0.8.3"
|
108
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
109
|
+
checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19"
|
110
|
+
dependencies = [
|
111
|
+
"cfg-if",
|
112
|
+
"windows-targets",
|
113
|
+
]
|
114
|
+
|
115
|
+
[[package]]
|
116
|
+
name = "magnus"
|
117
|
+
version = "0.6.3"
|
118
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
119
|
+
checksum = "0fc7a31fb0b64761e3cd09a6975577601fccc5f08b8fc9245064fc4f71ed6a9d"
|
120
|
+
dependencies = [
|
121
|
+
"magnus-macros",
|
122
|
+
"rb-sys",
|
123
|
+
"rb-sys-env",
|
124
|
+
"seq-macro",
|
125
|
+
]
|
126
|
+
|
127
|
+
[[package]]
|
128
|
+
name = "magnus-macros"
|
129
|
+
version = "0.6.0"
|
130
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
131
|
+
checksum = "5968c820e2960565f647819f5928a42d6e874551cab9d88d75e3e0660d7f71e3"
|
132
|
+
dependencies = [
|
133
|
+
"proc-macro2",
|
134
|
+
"quote",
|
135
|
+
"syn",
|
136
|
+
]
|
137
|
+
|
138
|
+
[[package]]
|
139
|
+
name = "memchr"
|
140
|
+
version = "2.7.2"
|
141
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
142
|
+
checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
|
143
|
+
|
144
|
+
[[package]]
|
145
|
+
name = "minimal-lexical"
|
146
|
+
version = "0.2.1"
|
147
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
148
|
+
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
|
149
|
+
|
150
|
+
[[package]]
|
151
|
+
name = "nom"
|
152
|
+
version = "7.1.3"
|
153
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
154
|
+
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
|
155
|
+
dependencies = [
|
156
|
+
"memchr",
|
157
|
+
"minimal-lexical",
|
158
|
+
]
|
159
|
+
|
160
|
+
[[package]]
|
161
|
+
name = "proc-macro2"
|
162
|
+
version = "1.0.79"
|
163
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
164
|
+
checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e"
|
165
|
+
dependencies = [
|
166
|
+
"unicode-ident",
|
167
|
+
]
|
168
|
+
|
169
|
+
[[package]]
|
170
|
+
name = "quote"
|
171
|
+
version = "1.0.36"
|
172
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
173
|
+
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
|
174
|
+
dependencies = [
|
175
|
+
"proc-macro2",
|
176
|
+
]
|
177
|
+
|
178
|
+
[[package]]
|
179
|
+
name = "rb-sys"
|
180
|
+
version = "0.9.91"
|
181
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
182
|
+
checksum = "eb81203e271055178603e243fee397f5f4aac125bcd20036279683fb1445a899"
|
183
|
+
dependencies = [
|
184
|
+
"rb-sys-build",
|
185
|
+
]
|
186
|
+
|
187
|
+
[[package]]
|
188
|
+
name = "rb-sys-build"
|
189
|
+
version = "0.9.91"
|
190
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
191
|
+
checksum = "9de9403a6aac834e7c9534575cb14188b6b5b99bafe475d18d838d44fbc27d31"
|
192
|
+
dependencies = [
|
193
|
+
"bindgen",
|
194
|
+
"lazy_static",
|
195
|
+
"proc-macro2",
|
196
|
+
"quote",
|
197
|
+
"regex",
|
198
|
+
"shell-words",
|
199
|
+
"syn",
|
200
|
+
]
|
201
|
+
|
202
|
+
[[package]]
|
203
|
+
name = "rb-sys-env"
|
204
|
+
version = "0.1.2"
|
205
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
206
|
+
checksum = "a35802679f07360454b418a5d1735c89716bde01d35b1560fc953c1415a0b3bb"
|
207
|
+
|
208
|
+
[[package]]
|
209
|
+
name = "regex"
|
210
|
+
version = "1.10.4"
|
211
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
212
|
+
checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
|
213
|
+
dependencies = [
|
214
|
+
"aho-corasick",
|
215
|
+
"memchr",
|
216
|
+
"regex-automata",
|
217
|
+
"regex-syntax",
|
218
|
+
]
|
219
|
+
|
220
|
+
[[package]]
|
221
|
+
name = "regex-automata"
|
222
|
+
version = "0.4.6"
|
223
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
224
|
+
checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
|
225
|
+
dependencies = [
|
226
|
+
"aho-corasick",
|
227
|
+
"memchr",
|
228
|
+
"regex-syntax",
|
229
|
+
]
|
230
|
+
|
231
|
+
[[package]]
|
232
|
+
name = "regex-syntax"
|
233
|
+
version = "0.8.3"
|
234
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
235
|
+
checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
|
236
|
+
|
237
|
+
[[package]]
|
238
|
+
name = "rustc-hash"
|
239
|
+
version = "1.1.0"
|
240
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
241
|
+
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
242
|
+
|
243
|
+
[[package]]
|
244
|
+
name = "seq-macro"
|
245
|
+
version = "0.3.5"
|
246
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
247
|
+
checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
|
248
|
+
|
249
|
+
[[package]]
|
250
|
+
name = "shell-words"
|
251
|
+
version = "1.1.0"
|
252
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
253
|
+
checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde"
|
254
|
+
|
255
|
+
[[package]]
|
256
|
+
name = "shlex"
|
257
|
+
version = "1.3.0"
|
258
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
259
|
+
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
260
|
+
|
261
|
+
[[package]]
|
262
|
+
name = "syn"
|
263
|
+
version = "2.0.58"
|
264
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
265
|
+
checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687"
|
266
|
+
dependencies = [
|
267
|
+
"proc-macro2",
|
268
|
+
"quote",
|
269
|
+
"unicode-ident",
|
270
|
+
]
|
271
|
+
|
272
|
+
[[package]]
|
273
|
+
name = "unicode-ident"
|
274
|
+
version = "1.0.12"
|
275
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
276
|
+
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
277
|
+
|
278
|
+
[[package]]
|
279
|
+
name = "unicode-segmentation"
|
280
|
+
version = "1.11.0"
|
281
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
282
|
+
checksum = "d4c87d22b6e3f4a18d4d40ef354e97c90fcb14dd91d7dc0aa9d8a1172ebf7202"
|
283
|
+
|
284
|
+
[[package]]
|
285
|
+
name = "windows-targets"
|
286
|
+
version = "0.52.4"
|
287
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
288
|
+
checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b"
|
289
|
+
dependencies = [
|
290
|
+
"windows_aarch64_gnullvm",
|
291
|
+
"windows_aarch64_msvc",
|
292
|
+
"windows_i686_gnu",
|
293
|
+
"windows_i686_msvc",
|
294
|
+
"windows_x86_64_gnu",
|
295
|
+
"windows_x86_64_gnullvm",
|
296
|
+
"windows_x86_64_msvc",
|
297
|
+
]
|
298
|
+
|
299
|
+
[[package]]
|
300
|
+
name = "windows_aarch64_gnullvm"
|
301
|
+
version = "0.52.4"
|
302
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
303
|
+
checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9"
|
304
|
+
|
305
|
+
[[package]]
|
306
|
+
name = "windows_aarch64_msvc"
|
307
|
+
version = "0.52.4"
|
308
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
309
|
+
checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675"
|
310
|
+
|
311
|
+
[[package]]
|
312
|
+
name = "windows_i686_gnu"
|
313
|
+
version = "0.52.4"
|
314
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
315
|
+
checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3"
|
316
|
+
|
317
|
+
[[package]]
|
318
|
+
name = "windows_i686_msvc"
|
319
|
+
version = "0.52.4"
|
320
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
321
|
+
checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02"
|
322
|
+
|
323
|
+
[[package]]
|
324
|
+
name = "windows_x86_64_gnu"
|
325
|
+
version = "0.52.4"
|
326
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
327
|
+
checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03"
|
328
|
+
|
329
|
+
[[package]]
|
330
|
+
name = "windows_x86_64_gnullvm"
|
331
|
+
version = "0.52.4"
|
332
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
333
|
+
checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177"
|
334
|
+
|
335
|
+
[[package]]
|
336
|
+
name = "windows_x86_64_msvc"
|
337
|
+
version = "0.52.4"
|
338
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
339
|
+
checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
|
340
|
+
|
341
|
+
[[package]]
|
342
|
+
name = "xre"
|
343
|
+
version = "0.1.0"
|
344
|
+
dependencies = [
|
345
|
+
"magnus",
|
346
|
+
"regex",
|
347
|
+
"unicode-segmentation",
|
348
|
+
]
|
data/Cargo.toml
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# This Cargo.toml is here to let externals tools (IDEs, etc.) know that this is
|
2
|
+
# a Rust project. Your extensions dependencies should be added to the Cargo.toml
|
3
|
+
# in the ext/ directory.
|
4
|
+
|
5
|
+
[workspace]
|
6
|
+
members = ["./ext/xre"]
|
7
|
+
resolver = "2"
|
8
|
+
|
9
|
+
[profile]
|
10
|
+
release.lto = true
|
data/ext/xre/Cargo.toml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
[package]
|
2
|
+
name = "xre"
|
3
|
+
version = "0.1.0"
|
4
|
+
edition = "2021"
|
5
|
+
authors = ["barseek <sergey.b@hey.com>"]
|
6
|
+
license = "MIT"
|
7
|
+
publish = false
|
8
|
+
|
9
|
+
[lib]
|
10
|
+
crate-type = ["cdylib"]
|
11
|
+
|
12
|
+
[dependencies]
|
13
|
+
magnus = "0.6.2"
|
14
|
+
regex = "1.10.4"
|
15
|
+
unicode-segmentation = "1.11.0"
|
data/ext/xre/extconf.rb
ADDED
data/ext/xre/src/lib.rs
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
mod regex_list;
|
2
|
+
|
3
|
+
use magnus::{function, method, prelude::*, Error, Ruby};
|
4
|
+
use regex_list::RegexList;
|
5
|
+
|
6
|
+
#[magnus::init]
|
7
|
+
fn init(ruby: &Ruby) -> Result<(), Error> {
|
8
|
+
let module = ruby.define_module("Xre")?;
|
9
|
+
let regex_list = module.define_class("RegexList", ruby.class_object())?;
|
10
|
+
regex_list.define_singleton_method("new", function!(RegexList::new, 1))?;
|
11
|
+
regex_list.define_singleton_method(
|
12
|
+
"build_from_strings",
|
13
|
+
function!(RegexList::build_from_strings, 1),
|
14
|
+
)?;
|
15
|
+
regex_list.define_method(
|
16
|
+
"__captures_with_context",
|
17
|
+
method!(RegexList::captures_with_context, 2),
|
18
|
+
)?;
|
19
|
+
regex_list.define_method(
|
20
|
+
"__captures_without_context",
|
21
|
+
method!(RegexList::captures_without_context, 1),
|
22
|
+
)?;
|
23
|
+
regex_list.define_method("__targets", method!(RegexList::targets, 0))?;
|
24
|
+
Ok(())
|
25
|
+
}
|
@@ -0,0 +1,87 @@
|
|
1
|
+
use std::iter::Enumerate;
|
2
|
+
use std::ops::Range;
|
3
|
+
use std::str::CharIndices;
|
4
|
+
use unicode_segmentation::UnicodeSegmentation;
|
5
|
+
|
6
|
+
pub fn find_char_index(iterator: &mut Enumerate<CharIndices>, byte_start: usize) -> usize {
|
7
|
+
iterator
|
8
|
+
.find_map(|(idx, (byte_index, _))| (byte_index >= byte_start).then_some(idx))
|
9
|
+
.unwrap_or_default()
|
10
|
+
}
|
11
|
+
|
12
|
+
pub fn string_range_with_radius(subject: &str, range: Range<usize>, radius: usize) -> String {
|
13
|
+
let l_text = subject[..range.start]
|
14
|
+
.graphemes(true)
|
15
|
+
.rev()
|
16
|
+
.take(radius)
|
17
|
+
.take_while(|g| *g != "\n")
|
18
|
+
.collect::<String>()
|
19
|
+
.chars()
|
20
|
+
.rev()
|
21
|
+
.collect::<String>();
|
22
|
+
let r_text = subject[range.end..]
|
23
|
+
.graphemes(true)
|
24
|
+
.take(radius)
|
25
|
+
.take_while(|g| *g != "\n")
|
26
|
+
.collect::<String>();
|
27
|
+
format!("{}{}{}", l_text, &subject[range], r_text)
|
28
|
+
}
|
29
|
+
|
30
|
+
#[cfg(test)]
|
31
|
+
mod tests {
|
32
|
+
use super::*;
|
33
|
+
|
34
|
+
#[test]
|
35
|
+
fn test_string_range_with_radius_when_subject_is_too_small() {
|
36
|
+
let subject = "abc";
|
37
|
+
let range = 1..2;
|
38
|
+
let radius = 5;
|
39
|
+
assert_eq!(string_range_with_radius(subject, range, radius), "abc");
|
40
|
+
}
|
41
|
+
|
42
|
+
#[test]
|
43
|
+
fn test_string_range_with_radius() {
|
44
|
+
let subject = "abc def ghi";
|
45
|
+
let range = 4..7;
|
46
|
+
let radius = 5;
|
47
|
+
assert_eq!(
|
48
|
+
string_range_with_radius(subject, range, radius),
|
49
|
+
"abc def ghi"
|
50
|
+
);
|
51
|
+
}
|
52
|
+
|
53
|
+
#[test]
|
54
|
+
fn test_string_range_with_radius_with_non_byte_chars() {
|
55
|
+
let subject = "👨asdf👩asdf👧asdf👦";
|
56
|
+
let range = 13..15;
|
57
|
+
let radius = 5;
|
58
|
+
assert_eq!(
|
59
|
+
string_range_with_radius(subject, range, radius),
|
60
|
+
"sdf👩asdf👧asd"
|
61
|
+
);
|
62
|
+
}
|
63
|
+
|
64
|
+
#[test]
|
65
|
+
fn find_char_index_when_byte_exists() {
|
66
|
+
let subject = "abc def ghi";
|
67
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
68
|
+
let byte_start = 4;
|
69
|
+
assert_eq!(find_char_index(&mut chars_iterator, byte_start), 4);
|
70
|
+
}
|
71
|
+
|
72
|
+
#[test]
|
73
|
+
fn find_char_index_with_non_byte_chars() {
|
74
|
+
let subject = "👨asdf👩asdf";
|
75
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
76
|
+
let byte_start = 7;
|
77
|
+
assert_eq!(find_char_index(&mut chars_iterator, byte_start), 4);
|
78
|
+
}
|
79
|
+
|
80
|
+
#[test]
|
81
|
+
fn find_char_index_when_byte_does_not_exist() {
|
82
|
+
let subject = "abc def ghi";
|
83
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
84
|
+
let byte_start = 100;
|
85
|
+
assert_eq!(find_char_index(&mut chars_iterator, byte_start), 0)
|
86
|
+
}
|
87
|
+
}
|
@@ -0,0 +1,139 @@
|
|
1
|
+
mod utils;
|
2
|
+
|
3
|
+
use magnus::{exception, Error};
|
4
|
+
use regex::{escape, Match, Regex, RegexBuilder};
|
5
|
+
use unicode_segmentation::UnicodeSegmentation;
|
6
|
+
use utils::*;
|
7
|
+
|
8
|
+
#[magnus::wrap(class = "Xre::RegexList")]
|
9
|
+
pub struct RegexList {
|
10
|
+
targets: Vec<Regex>,
|
11
|
+
}
|
12
|
+
|
13
|
+
impl RegexList {
|
14
|
+
pub fn new(targets: Vec<String>) -> Result<Self, Error> {
|
15
|
+
Ok(Self {
|
16
|
+
targets: targets
|
17
|
+
.iter()
|
18
|
+
.map(|r| regex(r))
|
19
|
+
.collect::<Result<Vec<Regex>, Error>>()?,
|
20
|
+
})
|
21
|
+
}
|
22
|
+
|
23
|
+
pub fn build_from_strings(targets: Vec<Vec<String>>) -> Result<Self, Error> {
|
24
|
+
let targets = targets
|
25
|
+
.iter()
|
26
|
+
.map(|t| combine_into_regex_like(t))
|
27
|
+
.collect::<Vec<String>>();
|
28
|
+
|
29
|
+
Self::new(targets)
|
30
|
+
}
|
31
|
+
|
32
|
+
pub fn targets(&self) -> Vec<String> {
|
33
|
+
self.targets
|
34
|
+
.iter()
|
35
|
+
.map(|r| r.as_str().to_string())
|
36
|
+
.collect()
|
37
|
+
}
|
38
|
+
|
39
|
+
pub fn captures_with_context(
|
40
|
+
&self,
|
41
|
+
subject: String,
|
42
|
+
radius: usize,
|
43
|
+
) -> Result<Vec<(String, usize, String)>, Error> {
|
44
|
+
Ok(self
|
45
|
+
.targets
|
46
|
+
.iter()
|
47
|
+
.map(|t| captures_with_context(t, &subject, radius))
|
48
|
+
.collect::<Result<Vec<Vec<(String, usize, String)>>, Error>>()?
|
49
|
+
.into_iter()
|
50
|
+
.flatten()
|
51
|
+
.collect())
|
52
|
+
}
|
53
|
+
|
54
|
+
pub fn captures_without_context(&self, subject: String) -> Result<Vec<(String, usize)>, Error> {
|
55
|
+
Ok(self
|
56
|
+
.targets
|
57
|
+
.iter()
|
58
|
+
.map(|t| captures_without_context(t, &subject))
|
59
|
+
.collect::<Result<Vec<Vec<(String, usize)>>, Error>>()?
|
60
|
+
.into_iter()
|
61
|
+
.flatten()
|
62
|
+
.collect())
|
63
|
+
}
|
64
|
+
}
|
65
|
+
|
66
|
+
fn fuzzy_escape(grapheme: &str) -> String {
|
67
|
+
match grapheme {
|
68
|
+
" " | "\u{00A0}" | "_" | "-" => "[ \u{00A0}_-]?".to_string(),
|
69
|
+
"'" | "‘" | "’" => "['‘’]".to_string(),
|
70
|
+
c => escape(c),
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
fn combine_into_regex_like(targets: &Vec<String>) -> String {
|
75
|
+
let mut targets = targets
|
76
|
+
.iter()
|
77
|
+
.map(|t| t.graphemes(true).map(|g| fuzzy_escape(g)).collect())
|
78
|
+
.collect::<Vec<String>>();
|
79
|
+
|
80
|
+
targets.sort_unstable_by(|a, b| b.len().cmp(&a.len()));
|
81
|
+
|
82
|
+
format!("(?im:({}))", targets.join("|"))
|
83
|
+
}
|
84
|
+
|
85
|
+
fn regex(target: &str) -> Result<Regex, Error> {
|
86
|
+
RegexBuilder::new(target)
|
87
|
+
.multi_line(true)
|
88
|
+
.build()
|
89
|
+
.map_err(|_| Error::new(exception::arg_error(), format!("Invalid regex: {}", target)))
|
90
|
+
}
|
91
|
+
|
92
|
+
fn captures_with_context(
|
93
|
+
target: &Regex,
|
94
|
+
subject: &str,
|
95
|
+
radius: usize,
|
96
|
+
) -> Result<Vec<(String, usize, String)>, Error> {
|
97
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
98
|
+
|
99
|
+
let captures = captures(target, subject)?;
|
100
|
+
|
101
|
+
captures
|
102
|
+
.iter()
|
103
|
+
.map(|m| {
|
104
|
+
Ok((
|
105
|
+
m.as_str().trim().to_string(),
|
106
|
+
find_char_index(&mut chars_iterator, m.start()),
|
107
|
+
string_range_with_radius(subject, m.start()..m.end(), radius),
|
108
|
+
))
|
109
|
+
})
|
110
|
+
.collect()
|
111
|
+
}
|
112
|
+
|
113
|
+
fn captures_without_context(target: &Regex, subject: &str) -> Result<Vec<(String, usize)>, Error> {
|
114
|
+
let mut chars_iterator = subject.char_indices().enumerate();
|
115
|
+
|
116
|
+
let captures = captures(target, subject)?;
|
117
|
+
|
118
|
+
captures
|
119
|
+
.iter()
|
120
|
+
.map(|m| {
|
121
|
+
Ok((
|
122
|
+
m.as_str().trim().to_string(),
|
123
|
+
find_char_index(&mut chars_iterator, m.start()),
|
124
|
+
))
|
125
|
+
})
|
126
|
+
.collect()
|
127
|
+
}
|
128
|
+
|
129
|
+
fn captures<'a>(target: &'a Regex, subject: &'a str) -> Result<Vec<Match<'a>>, Error> {
|
130
|
+
target
|
131
|
+
.captures_iter(subject)
|
132
|
+
.map(|m| {
|
133
|
+
m.get(0).ok_or(Error::new(
|
134
|
+
exception::runtime_error(),
|
135
|
+
"No capture group found",
|
136
|
+
))
|
137
|
+
})
|
138
|
+
.collect::<Result<Vec<Match>, Error>>()
|
139
|
+
}
|
data/lib/xre.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "xre/xre"
|
4
|
+
|
5
|
+
module Xre
|
6
|
+
class RegexList
|
7
|
+
def captures(text, context_radius: nil)
|
8
|
+
if context_radius
|
9
|
+
__captures_with_context(text, context_radius)
|
10
|
+
else
|
11
|
+
__captures_without_context(text)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def inspect
|
16
|
+
"#<Xre::RegexList targets=#{__targets}"
|
17
|
+
end
|
18
|
+
|
19
|
+
private :__captures_with_context, :__captures_without_context, :__targets
|
20
|
+
end
|
21
|
+
end
|