ahocorasick-rust 2.2.0-x86_64-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +255 -0
- data/Rakefile +33 -0
- data/docs/match_kind.md +139 -0
- data/docs/reference.md +396 -0
- data/ext/rahocorasick/Cargo.lock +304 -0
- data/ext/rahocorasick/Cargo.toml +13 -0
- data/ext/rahocorasick/extconf.rb +6 -0
- data/ext/rahocorasick/src/lib.rs +191 -0
- data/lib/ahocorasick-rust.rb +9 -0
- data/lib/rahocorasick/2.7/rahocorasick.so +0 -0
- data/lib/rahocorasick/3.0/rahocorasick.so +0 -0
- data/lib/rahocorasick/3.1/rahocorasick.so +0 -0
- data/lib/rahocorasick/3.2/rahocorasick.so +0 -0
- data/lib/rahocorasick/3.3/rahocorasick.so +0 -0
- data/lib/rahocorasick/3.4/rahocorasick.so +0 -0
- metadata +79 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
|
|
2
|
+
use magnus::{method, function, prelude::*, Error, Ruby, RHash, RArray, Value, Symbol};
|
|
3
|
+
|
|
4
|
+
#[magnus::wrap(class = "AhoCorasickRust")]
|
|
5
|
+
pub struct AhoCorasickRust {
|
|
6
|
+
words: Vec<String>,
|
|
7
|
+
ac: AhoCorasick,
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
impl AhoCorasickRust {
|
|
11
|
+
fn new_impl(ruby: &Ruby, words: Vec<String>, kwargs: Option<RHash>) -> Result<Self, Error> {
|
|
12
|
+
let mut builder = AhoCorasickBuilder::new();
|
|
13
|
+
|
|
14
|
+
// Check for options if kwargs provided
|
|
15
|
+
if let Some(kwargs) = kwargs {
|
|
16
|
+
// case_insensitive option
|
|
17
|
+
if let Some(val) = kwargs.get(ruby.to_symbol("case_insensitive")) {
|
|
18
|
+
if let Ok(case_insensitive) = bool::try_convert(val) {
|
|
19
|
+
if case_insensitive {
|
|
20
|
+
builder.ascii_case_insensitive(true);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// match_kind option
|
|
26
|
+
if let Some(val) = kwargs.get(ruby.to_symbol("match_kind")) {
|
|
27
|
+
if let Some(sym) = Symbol::from_value(val) {
|
|
28
|
+
let kind_str = sym.name()?.to_string();
|
|
29
|
+
let match_kind = match kind_str.as_ref() {
|
|
30
|
+
"standard" => MatchKind::Standard,
|
|
31
|
+
"leftmost_first" => MatchKind::LeftmostFirst,
|
|
32
|
+
"leftmost_longest" => MatchKind::LeftmostLongest,
|
|
33
|
+
_ => {
|
|
34
|
+
return Err(Error::new(
|
|
35
|
+
ruby.exception_arg_error(),
|
|
36
|
+
format!("Invalid match_kind: '{}'. Valid values are :standard, :leftmost_first, :leftmost_longest", kind_str)
|
|
37
|
+
));
|
|
38
|
+
}
|
|
39
|
+
};
|
|
40
|
+
builder.match_kind(match_kind);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
let ac = builder.build(&words)
|
|
46
|
+
.map_err(|e| Error::new(ruby.exception_runtime_error(), format!("Failed to build automaton: {}", e)))?;
|
|
47
|
+
Ok(Self { words, ac })
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
fn new(ruby: &Ruby, args: &[Value]) -> Result<Self, Error> {
|
|
51
|
+
let args = magnus::scan_args::scan_args::<(Vec<String>,), (), (), (), RHash, ()>(args)?;
|
|
52
|
+
let (words,) = args.required;
|
|
53
|
+
let kwargs = args.keywords;
|
|
54
|
+
|
|
55
|
+
// Only pass kwargs if non-empty
|
|
56
|
+
let kwargs_opt = if kwargs.len() > 0 { Some(kwargs) } else { None };
|
|
57
|
+
|
|
58
|
+
Self::new_impl(ruby, words, kwargs_opt)
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
fn lookup(&self, haystack: String) -> Vec<String> {
|
|
62
|
+
let mut matches = vec![];
|
|
63
|
+
for mat in self.ac.find_iter(&haystack) {
|
|
64
|
+
matches.push(self.words[mat.pattern()].clone());
|
|
65
|
+
}
|
|
66
|
+
matches
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
fn is_match(&self, haystack: String) -> bool {
|
|
70
|
+
self.ac.is_match(&haystack)
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
fn lookup_overlapping(&self, haystack: String) -> Vec<String> {
|
|
74
|
+
let mut matches = vec![];
|
|
75
|
+
for mat in self.ac.find_overlapping_iter(&haystack) {
|
|
76
|
+
matches.push(self.words[mat.pattern()].clone());
|
|
77
|
+
}
|
|
78
|
+
matches
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
fn find_first(&self, haystack: String) -> Option<String> {
|
|
82
|
+
self.ac.find(&haystack).map(|mat| self.words[mat.pattern()].clone())
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
fn find_first_with_position(&self, haystack: String) -> Result<Option<RHash>, Error> {
|
|
86
|
+
let ruby = Ruby::get().unwrap();
|
|
87
|
+
if let Some(mat) = self.ac.find(&haystack) {
|
|
88
|
+
let hash = ruby.hash_new();
|
|
89
|
+
hash.aset(ruby.to_symbol("pattern"), self.words[mat.pattern()].clone())?;
|
|
90
|
+
hash.aset(ruby.to_symbol("start"), mat.start())?;
|
|
91
|
+
hash.aset(ruby.to_symbol("end"), mat.end())?;
|
|
92
|
+
Ok(Some(hash))
|
|
93
|
+
} else {
|
|
94
|
+
Ok(None)
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
fn lookup_with_positions(&self, haystack: String) -> Result<RArray, Error> {
|
|
99
|
+
let ruby = Ruby::get().unwrap();
|
|
100
|
+
let matches = ruby.ary_new();
|
|
101
|
+
for mat in self.ac.find_iter(&haystack) {
|
|
102
|
+
let hash = ruby.hash_new();
|
|
103
|
+
hash.aset(ruby.to_symbol("pattern"), self.words[mat.pattern()].clone())?;
|
|
104
|
+
hash.aset(ruby.to_symbol("start"), mat.start())?;
|
|
105
|
+
hash.aset(ruby.to_symbol("end"), mat.end())?;
|
|
106
|
+
matches.push(hash)?;
|
|
107
|
+
}
|
|
108
|
+
Ok(matches)
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
fn replace_all(&self, args: &[Value]) -> Result<String, Error> {
|
|
112
|
+
let ruby = Ruby::get().unwrap();
|
|
113
|
+
|
|
114
|
+
// Parse arguments: haystack (required), replacements (optional if block given)
|
|
115
|
+
let haystack: String = if args.is_empty() {
|
|
116
|
+
return Err(Error::new(
|
|
117
|
+
ruby.exception_arg_error(),
|
|
118
|
+
"wrong number of arguments (given 0, expected 1..2)"
|
|
119
|
+
));
|
|
120
|
+
} else {
|
|
121
|
+
String::try_convert(args[0])?
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
// Check if a block was given
|
|
125
|
+
match ruby.block_proc() {
|
|
126
|
+
Ok(proc) => {
|
|
127
|
+
// Block-based replacement
|
|
128
|
+
let mut result = haystack.clone();
|
|
129
|
+
let mut offset: isize = 0;
|
|
130
|
+
|
|
131
|
+
for mat in self.ac.find_iter(&haystack) {
|
|
132
|
+
let pattern = &self.words[mat.pattern()];
|
|
133
|
+
let replacement: String = proc.call((pattern.clone(),))?;
|
|
134
|
+
|
|
135
|
+
let start = (mat.start() as isize + offset) as usize;
|
|
136
|
+
let end = (mat.end() as isize + offset) as usize;
|
|
137
|
+
|
|
138
|
+
result.replace_range(start..end, &replacement);
|
|
139
|
+
offset += replacement.len() as isize - (mat.end() - mat.start()) as isize;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
Ok(result)
|
|
143
|
+
}
|
|
144
|
+
Err(_) if args.len() >= 2 => {
|
|
145
|
+
// Hash-based replacement
|
|
146
|
+
if let Some(hash) = RHash::from_value(args[1]) {
|
|
147
|
+
let mut replace_with: Vec<String> = Vec::with_capacity(self.words.len());
|
|
148
|
+
|
|
149
|
+
for word in &self.words {
|
|
150
|
+
if let Some(val) = hash.get(word.clone()) {
|
|
151
|
+
if let Ok(replacement) = String::try_convert(val) {
|
|
152
|
+
replace_with.push(replacement);
|
|
153
|
+
} else {
|
|
154
|
+
replace_with.push(word.clone());
|
|
155
|
+
}
|
|
156
|
+
} else {
|
|
157
|
+
replace_with.push(word.clone());
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
Ok(self.ac.replace_all(&haystack, &replace_with))
|
|
162
|
+
} else {
|
|
163
|
+
Err(Error::new(
|
|
164
|
+
ruby.exception_arg_error(),
|
|
165
|
+
"replace_all requires a Hash or block"
|
|
166
|
+
))
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
Err(_) => {
|
|
170
|
+
Err(Error::new(
|
|
171
|
+
ruby.exception_arg_error(),
|
|
172
|
+
"replace_all requires a Hash or block"
|
|
173
|
+
))
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
#[magnus::init]
|
|
180
|
+
fn main(ruby: &Ruby) -> Result<(), Error> {
|
|
181
|
+
let class = ruby.define_class("AhoCorasickRust", ruby.class_object())?;
|
|
182
|
+
class.define_singleton_method("new", function!(AhoCorasickRust::new, -1))?;
|
|
183
|
+
class.define_method("lookup", method!(AhoCorasickRust::lookup, 1))?;
|
|
184
|
+
class.define_method("match?", method!(AhoCorasickRust::is_match, 1))?;
|
|
185
|
+
class.define_method("lookup_overlapping", method!(AhoCorasickRust::lookup_overlapping, 1))?;
|
|
186
|
+
class.define_method("find_first", method!(AhoCorasickRust::find_first, 1))?;
|
|
187
|
+
class.define_method("find_first_with_position", method!(AhoCorasickRust::find_first_with_position, 1))?;
|
|
188
|
+
class.define_method("lookup_with_positions", method!(AhoCorasickRust::lookup_with_positions, 1))?;
|
|
189
|
+
class.define_method("replace_all", method!(AhoCorasickRust::replace_all, -1))?;
|
|
190
|
+
Ok(())
|
|
191
|
+
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
metadata
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: ahocorasick-rust
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 2.2.0
|
|
5
|
+
platform: x86_64-linux-gnu
|
|
6
|
+
authors:
|
|
7
|
+
- Eric
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-01-13 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: rb_sys
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 0.9.117
|
|
20
|
+
type: :runtime
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: 0.9.117
|
|
27
|
+
description: A Ruby gem wrapping the legendary Rust Aho-Corasick algorithm! Aho-Corasick
|
|
28
|
+
is a powerful string searching algorithm that finds multiple patterns simultaneously
|
|
29
|
+
in a text. Features include overlapping matches, case-insensitive search, find &
|
|
30
|
+
replace, match positions, and configurable match strategies. Perfect for content
|
|
31
|
+
filtering, tokenization, and multi-pattern search at lightning speed! (ノ◕ヮ◕)ノ*:・゚✧
|
|
32
|
+
email:
|
|
33
|
+
- eric@ebj.dev
|
|
34
|
+
executables: []
|
|
35
|
+
extensions: []
|
|
36
|
+
extra_rdoc_files: []
|
|
37
|
+
files:
|
|
38
|
+
- README.md
|
|
39
|
+
- Rakefile
|
|
40
|
+
- docs/match_kind.md
|
|
41
|
+
- docs/reference.md
|
|
42
|
+
- ext/rahocorasick/Cargo.lock
|
|
43
|
+
- ext/rahocorasick/Cargo.toml
|
|
44
|
+
- ext/rahocorasick/extconf.rb
|
|
45
|
+
- ext/rahocorasick/src/lib.rs
|
|
46
|
+
- lib/ahocorasick-rust.rb
|
|
47
|
+
- lib/rahocorasick/2.7/rahocorasick.so
|
|
48
|
+
- lib/rahocorasick/3.0/rahocorasick.so
|
|
49
|
+
- lib/rahocorasick/3.1/rahocorasick.so
|
|
50
|
+
- lib/rahocorasick/3.2/rahocorasick.so
|
|
51
|
+
- lib/rahocorasick/3.3/rahocorasick.so
|
|
52
|
+
- lib/rahocorasick/3.4/rahocorasick.so
|
|
53
|
+
homepage: https://github.com/jetpks/ahocorasick-rust-ruby
|
|
54
|
+
licenses:
|
|
55
|
+
- MIT
|
|
56
|
+
metadata: {}
|
|
57
|
+
post_install_message:
|
|
58
|
+
rdoc_options: []
|
|
59
|
+
require_paths:
|
|
60
|
+
- lib
|
|
61
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
62
|
+
requirements:
|
|
63
|
+
- - ">="
|
|
64
|
+
- !ruby/object:Gem::Version
|
|
65
|
+
version: '2.7'
|
|
66
|
+
- - "<"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: 3.5.dev
|
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
70
|
+
requirements:
|
|
71
|
+
- - ">="
|
|
72
|
+
- !ruby/object:Gem::Version
|
|
73
|
+
version: 3.3.22
|
|
74
|
+
requirements: []
|
|
75
|
+
rubygems_version: 3.5.23
|
|
76
|
+
signing_key:
|
|
77
|
+
specification_version: 4
|
|
78
|
+
summary: Blazing-fast ✨ Ruby wrapper for the Rust Aho-Corasick string matching algorithm!
|
|
79
|
+
test_files: []
|